src/Pure/Thy/rail.ML
author wenzelm
Sat, 23 Jul 2011 16:37:17 +0200
changeset 44818 9b00f09f7721
parent 44450 9864182c6bad
child 49779 4fe0920d5049
permissions -rw-r--r--
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
     1 (*  Title:      Pure/Thy/rail.ML
     2     Author:     Michael Kerscher, TU München
     3     Author:     Makarius
     4 
     5 Railroad diagrams in LaTeX.
     6 *)
     7 
     8 structure Rail: sig end =
     9 struct
    10 
    11 (** lexical syntax **)
    12 
    13 (* datatype token *)
    14 
    15 datatype kind =
    16   Keyword | Ident | String | Antiq of Symbol_Pos.T list * Position.range | EOF;
    17 
    18 datatype token = Token of Position.range * (kind * string);
    19 
    20 fun pos_of (Token ((pos, _), _)) = pos;
    21 fun end_pos_of (Token ((_, pos), _)) = pos;
    22 
    23 fun kind_of (Token (_, (k, _))) = k;
    24 fun content_of (Token (_, (_, x))) = x;
    25 
    26 
    27 (* diagnostics *)
    28 
    29 val print_kind =
    30  fn Keyword => "rail keyword"
    31   | Ident => "identifier"
    32   | String => "single-quoted string"
    33   | Antiq _ => "antiquotation"
    34   | EOF => "end-of-file";
    35 
    36 fun print (Token ((pos, _), (k, x))) =
    37   (if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^
    38   Position.str_of pos;
    39 
    40 fun print_keyword x = print_kind Keyword ^ " " ^ quote x;
    41 
    42 
    43 (* stopper *)
    44 
    45 fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
    46 val eof = mk_eof Position.none;
    47 
    48 fun is_eof (Token (_, (EOF, _))) = true
    49   | is_eof _ = false;
    50 
    51 val stopper =
    52   Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
    53 
    54 
    55 (* tokenize *)
    56 
    57 local
    58 
    59 fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))];
    60 
    61 val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol);
    62 
    63 val scan_keyword =
    64   Scan.one (member (op =) ["|", "*", "+", "?", "(", ")", "\\", ";", ":", "@"] o Symbol_Pos.symbol);
    65 
    66 val scan_token =
    67   scan_space >> K [] ||
    68   Antiquote.scan_antiq >> (fn antiq as (ss, _) => token (Antiq antiq) ss) ||
    69   scan_keyword >> (token Keyword o single) ||
    70   Lexicon.scan_id >> token Ident ||
    71   Symbol_Pos.scan_string_q >> (token String o #1 o #2);
    72 
    73 val scan =
    74   (Scan.repeat scan_token >> flat) --|
    75     Symbol_Pos.!!! (fn () => "Rail lexical error: bad input")
    76       (Scan.ahead (Scan.one Symbol_Pos.is_eof));
    77 
    78 in
    79 
    80 val tokenize = #1 o Scan.error (Scan.finite Symbol_Pos.stopper scan) o Symbol_Pos.explode;
    81 
    82 end;
    83 
    84 
    85 
    86 (** parsing **)
    87 
    88 fun !!! scan =
    89   let
    90     val prefix = "Rail syntax error";
    91 
    92     fun get_pos [] = " (past end-of-file!)"
    93       | get_pos (tok :: _) = Position.str_of (pos_of tok);
    94 
    95     fun err (toks, NONE) = (fn () => prefix ^ get_pos toks)
    96       | err (toks, SOME msg) =
    97           (fn () =>
    98             let val s = msg () in
    99               if String.isPrefix prefix s then s
   100               else prefix ^ get_pos toks ^ ": " ^ s
   101             end);
   102   in Scan.!! err scan end;
   103 
   104 fun $$$ x =
   105   Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) ||
   106   Scan.fail_with
   107     (fn [] => (fn () => print_keyword x ^ " expected (past end-of-file!)")
   108       | tok :: _ => (fn () => print_keyword x ^ " expected,\nbut " ^ print tok ^ " was found"));
   109 
   110 fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan);
   111 fun enum sep scan = enum1 sep scan || Scan.succeed [];
   112 
   113 val ident = Scan.some (fn tok => if kind_of tok = Ident then SOME (content_of tok) else NONE);
   114 val string = Scan.some (fn tok => if kind_of tok = String then SOME (content_of tok) else NONE);
   115 
   116 val antiq = Scan.some (fn tok => (case kind_of tok of Antiq a => SOME a | _ => NONE));
   117 
   118 
   119 
   120 (** rail expressions **)
   121 
   122 (* datatype *)
   123 
   124 datatype rails =
   125   Cat of int * rail list
   126 and rail =
   127   Bar of rails list |
   128   Plus of rails * rails |
   129   Newline of int |
   130   Nonterminal of string |
   131   Terminal of bool * string |
   132   Antiquote of bool * (Symbol_Pos.T list * Position.range);
   133 
   134 fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
   135 and reverse (Bar cats) = Bar (map reverse_cat cats)
   136   | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
   137   | reverse x = x;
   138 
   139 fun cat rails = Cat (0, rails);
   140 
   141 val empty = cat [];
   142 fun is_empty (Cat (_, [])) = true | is_empty _ = false;
   143 
   144 fun is_newline (Newline _) = true | is_newline _ = false;
   145 
   146 fun bar [Cat (_, [rail])] = rail
   147   | bar cats = Bar cats;
   148 
   149 fun plus cat1 cat2 = Plus (cat1, reverse_cat cat2);
   150 
   151 fun star cat1 cat2 =
   152   if is_empty cat2 then plus empty cat1
   153   else bar [empty, cat [plus cat1 cat2]];
   154 
   155 fun maybe rail = bar [empty, cat [rail]];
   156 
   157 
   158 (* read *)
   159 
   160 local
   161 
   162 val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true);
   163 
   164 fun body x = (enum1 "|" body1 >> bar) x
   165 and body0 x = (enum "|" body1 >> bar) x
   166 and body1 x =
   167  (body2 :|-- (fn a =>
   168    $$$ "*" |-- !!! body4e >> (cat o single o star a) ||
   169    $$$ "+" |-- !!! body4e >> (cat o single o plus a) ||
   170    Scan.succeed a)) x
   171 and body2 x = (Scan.repeat1 body3 >> cat) x
   172 and body3 x = (body4 :|-- (fn a => $$$ "?" >> K (maybe a) || Scan.succeed a)) x
   173 and body4 x =
   174  ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
   175   $$$ "\\" >> K (Newline 0) ||
   176   ident >> Nonterminal ||
   177   at_mode -- string >> Terminal ||
   178   at_mode -- antiq >> Antiquote) x
   179 and body4e x = (Scan.option body4 >> (cat o the_list)) x;
   180 
   181 val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq;
   182 val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text "");
   183 val rules = enum1 ";" (Scan.option rule) >> map_filter I;
   184 
   185 in
   186 
   187 val read =
   188   #1 o Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) o tokenize;
   189 
   190 end;
   191 
   192 
   193 (* latex output *)
   194 
   195 local
   196 
   197 fun vertical_range_cat (Cat (_, rails)) y =
   198   let val (rails', (_, y')) =
   199     fold_map (fn rail => fn (y0, y') =>
   200       if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2))
   201       else
   202         let val (rail', y0') = vertical_range rail y0;
   203         in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1)
   204   in (Cat (y, rails'), y') end
   205 
   206 and vertical_range (Bar cats) y =
   207       let val (cats', y') = fold_map vertical_range_cat cats y
   208       in (Bar cats', Int.max (y + 1, y')) end
   209   | vertical_range (Plus (cat1, cat2)) y =
   210       let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y;
   211       in (Plus (cat1', cat2'), Int.max (y + 1, y')) end
   212   | vertical_range (Newline _) y = (Newline (y + 2), y + 3)
   213   | vertical_range atom y = (atom, y + 1);
   214 
   215 fun output_rules state rules =
   216   let
   217     val output_antiq = Thy_Output.eval_antiq (#1 (Keyword.get_lexicons ())) state;
   218     fun output_text b s =
   219       Output.output s
   220       |> b ? enclose "\\isakeyword{" "}"
   221       |> enclose "\\isa{" "}";
   222 
   223     fun output_cat c (Cat (_, rails)) = outputs c rails
   224     and outputs c [rail] = output c rail
   225       | outputs _ rails = implode (map (output "") rails)
   226     and output _ (Bar []) = ""
   227       | output c (Bar [cat]) = output_cat c cat
   228       | output _ (Bar (cat :: cats)) =
   229           "\\rail@bar\n" ^ output_cat "" cat ^
   230           implode (map (fn Cat (y, rails) =>
   231               "\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^
   232           "\\rail@endbar\n"
   233       | output c (Plus (cat, Cat (y, rails))) =
   234           "\\rail@plus\n" ^ output_cat c cat ^
   235           "\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^
   236           "\\rail@endplus\n"
   237       | output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n"
   238       | output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text false s ^ "}[]\n"
   239       | output c (Terminal (b, s)) = "\\rail@" ^ c ^ "term{" ^ output_text b s ^ "}[]\n"
   240       | output c (Antiquote (b, a)) =
   241           "\\rail@" ^ c ^ (if b then "term{" else "nont{") ^ output_antiq a ^ "}[]\n";
   242 
   243     fun output_rule (name, rail) =
   244       let
   245         val (rail', y') = vertical_range rail 0;
   246         val out_name =
   247           (case name of
   248             Antiquote.Text "" => ""
   249           | Antiquote.Text s => output_text false s
   250           | Antiquote.Antiq a => output_antiq a);
   251       in
   252         "\\rail@begin{" ^ string_of_int y' ^ "}{" ^ out_name ^ "}\n" ^
   253         output "" rail' ^
   254         "\\rail@end\n"
   255       end;
   256   in
   257     "\\begin{railoutput}\n" ^
   258     implode (map output_rule rules) ^
   259     "\\end{railoutput}\n"
   260   end;
   261 
   262 in
   263 
   264 val _ =
   265   Context.>> (Context.map_theory
   266     (Thy_Output.antiquotation (Binding.name "rail")
   267       (Scan.lift (Parse.source_position Parse.string))
   268       (fn {state, ...} => output_rules state o read)));
   269 
   270 end;
   271 
   272 end;
   273