src/Pure/Thy/rail.ML
author wenzelm
Sat, 30 Apr 2011 20:48:29 +0200
changeset 43377 876887b07e8d
parent 43375 869c3f6f2d6e
child 43378 651aef3cc854
permissions -rw-r--r--
more robust error handling (NB: Source.source requires total scanner or recover);
tuned;
     1 (*  Title:      Pure/Thy/rail.ML
     2     Author:     Michael Kerscher, TU München
     3     Author:     Makarius
     4 
     5 Railroad diagrams in LaTeX.
     6 *)
     7 
     8 structure Rail: sig end =
     9 struct
    10 
    11 (** lexical syntax **)
    12 
    13 (* datatype token *)
    14 
    15 datatype kind = Keyword | Ident | String | EOF;
    16 
    17 datatype token = Token of Position.range * (kind * string);
    18 
    19 fun pos_of (Token ((pos, _), _)) = pos;
    20 fun end_pos_of (Token ((_, pos), _)) = pos;
    21 
    22 fun kind_of (Token (_, (k, _))) = k;
    23 fun content_of (Token (_, (_, x))) = x;
    24 
    25 
    26 (* diagnostics *)
    27 
    28 val print_kind =
    29  fn Keyword => "rail keyword"
    30   | Ident => "identifier"
    31   | String => "single-quoted string"
    32   | EOF => "end-of-file";
    33 
    34 fun print (Token ((pos, _), (k, x))) =
    35   (if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^
    36   Position.str_of pos;
    37 
    38 fun print_keyword x = print_kind Keyword ^ " " ^ quote x;
    39 
    40 
    41 (* stopper *)
    42 
    43 fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
    44 val eof = mk_eof Position.none;
    45 
    46 fun is_eof (Token (_, (EOF, _))) = true
    47   | is_eof _ = false;
    48 
    49 val stopper =
    50   Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
    51 
    52 
    53 (* tokenize *)
    54 
    55 local
    56 
    57 fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))];
    58 
    59 val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol);
    60 
    61 val scan_keyword =
    62   Scan.one (member (op =) ["|", "*", "+", "?", "(", ")", "\\", ";", ":"] o Symbol_Pos.symbol);
    63 
    64 val scan_token =
    65   scan_space >> K [] ||
    66   scan_keyword >> (token Keyword o single) ||
    67   Lexicon.scan_id >> token Ident ||
    68   Symbol_Pos.scan_string_q >> (token String o #1 o #2);
    69 
    70 val scan =
    71   (Scan.repeat scan_token >> flat) --|
    72     Symbol_Pos.!!! "Rail lexical error: bad input"
    73       (Scan.ahead (Scan.one Symbol_Pos.is_eof));
    74 
    75 in
    76 
    77 fun tokenize text =
    78   #1 (Scan.error (Scan.finite Symbol_Pos.stopper scan) (Symbol_Pos.explode text));
    79 
    80 end;
    81 
    82 
    83 
    84 (** parsing **)
    85 
    86 fun !!! scan =
    87   let
    88     val prefix = "Rail syntax error";
    89 
    90     fun get_pos [] = " (past end-of-file!)"
    91       | get_pos (tok :: _) = Position.str_of (pos_of tok);
    92 
    93     fun err (toks, NONE) = prefix ^ get_pos toks
    94       | err (toks, SOME msg) =
    95           if String.isPrefix prefix msg then msg
    96           else prefix ^ get_pos toks ^ ": " ^ msg;
    97   in Scan.!! err scan end;
    98 
    99 fun $$$ x =
   100   Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) ||
   101   Scan.fail_with
   102     (fn [] => print_keyword x ^ " expected (past end-of-file!)"
   103       | tok :: _ => print_keyword x ^ " expected,\nbut " ^ print tok ^ " was found");
   104 
   105 fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan);
   106 fun enum sep scan = enum1 sep scan || Scan.succeed [];
   107 
   108 fun parse_token kind =
   109   Scan.some (fn tok => if kind_of tok = kind then SOME (content_of tok) else NONE);
   110 
   111 val ident = parse_token Ident;
   112 val string = parse_token String;
   113 
   114 
   115 
   116 (** rail expressions **)
   117 
   118 (* datatype *)
   119 
   120 datatype rails =
   121   Cat of int * rail list
   122 and rail =
   123   Bar of rails list |
   124   Plus of rails * rails |
   125   Newline of int |
   126   Nonterminal of string |
   127   Terminal of string;
   128 
   129 fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
   130 and reverse (Bar cats) = Bar (map reverse_cat cats)
   131   | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
   132   | reverse x = x;
   133 
   134 fun cat rails = Cat (0, rails);
   135 
   136 val empty = cat [];
   137 fun is_empty (Cat (_, [])) = true | is_empty _ = false;
   138 
   139 fun is_newline (Newline _) = true | is_newline _ = false;
   140 
   141 fun bar [Cat (_, [rail])] = rail
   142   | bar cats = Bar cats;
   143 
   144 fun plus cat1 cat2 = Plus (cat1, reverse_cat cat2);
   145 
   146 fun star cat1 cat2 =
   147   if is_empty cat2 then plus empty cat1
   148   else bar [empty, cat [plus cat1 cat2]];
   149 
   150 fun maybe rail = bar [empty, cat [rail]];
   151 
   152 
   153 (* read *)
   154 
   155 local
   156 
   157 fun body x = (enum1 "|" body1 >> bar) x
   158 and body0 x = (enum "|" body1 >> bar) x
   159 and body1 x =
   160  (body2 :|-- (fn a =>
   161    $$$ "*" |-- !!! body4e >> (cat o single o star a) ||
   162    $$$ "+" |-- !!! body4e >> (cat o single o plus a) ||
   163    Scan.succeed a)) x
   164 and body2 x = (Scan.repeat1 body3 >> cat) x
   165 and body3 x = (body4 :|-- (fn a => $$$ "?" >> K (maybe a) || Scan.succeed a)) x
   166 and body4 x =
   167  ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
   168   $$$ "\\" >> K (Newline 0) ||
   169   ident >> Nonterminal ||
   170   string >> Terminal) x
   171 and body4e x = (Scan.option body4 >> (cat o the_list)) x;
   172 
   173 val rule = ident -- ($$$ ":" |-- !!! body) || body >> pair "";
   174 val rules = enum1 ";" (Scan.option rule) >> map_filter I;
   175 
   176 in
   177 
   178 fun read text =
   179   (case Scan.error (Scan.finite stopper rules) (tokenize text) of
   180     (res, []) => res
   181   | (_, tok :: _) => error ("Malformed rail input: " ^ print tok));
   182 
   183 end;
   184 
   185 
   186 (* latex output *)
   187 
   188 local
   189 
   190 fun vertical_range_cat (Cat (_, rails)) y =
   191   let val (rails', (_, y')) =
   192     fold_map (fn rail => fn (y0, y') =>
   193       if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2))
   194       else
   195         let val (rail', y0') = vertical_range rail y0;
   196         in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1)
   197   in (Cat (y, rails'), y') end
   198 
   199 and vertical_range (Bar cats) y =
   200       let val (cats', y') = fold_map vertical_range_cat cats y
   201       in (Bar cats', Int.max (y + 1, y')) end
   202   | vertical_range (Plus (cat1, cat2)) y =
   203       let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y;
   204       in (Plus (cat1', cat2'), Int.max (y + 1, y')) end
   205   | vertical_range (Newline _) y = (Newline (y + 2), y + 3)
   206   | vertical_range atom y = (atom, y + 1);
   207 
   208 
   209 fun output_text s = "\\isa{" ^ Output.output s ^ "}";
   210 
   211 fun output_cat c (Cat (_, rails)) = outputs c rails
   212 and outputs c [rail] = output c rail
   213   | outputs _ rails = implode (map (output "") rails)
   214 and output _ (Bar []) = ""
   215   | output c (Bar [cat]) = output_cat c cat
   216   | output _ (Bar (cat :: cats)) =
   217       "\\rail@bar\n" ^ output_cat "" cat ^
   218       implode (map (fn Cat (y, rails) =>
   219           "\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^
   220       "\\rail@endbar\n"
   221   | output c (Plus (cat, Cat (y, rails))) =
   222       "\\rail@plus\n" ^ output_cat c cat ^
   223       "\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^
   224       "\\rail@endplus\n"
   225   | output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n"
   226   | output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text s ^ "}[]\n"
   227   | output c (Terminal s) = "\\rail@" ^ c ^ "term{" ^ output_text s ^ "}[]\n";
   228 
   229 fun output_rule (name, rail) =
   230   let val (rail', y') = vertical_range rail 0 in
   231     "\\rail@begin{" ^ string_of_int y' ^ "}{" ^ output_text name ^ "}\n" ^
   232     output "" rail' ^
   233     "\\rail@end\n"
   234   end;
   235 
   236 fun output_rules rules =
   237   "\\begin{railoutput}\n" ^
   238   implode (map output_rule rules) ^
   239   "\\end{railoutput}\n";
   240 
   241 in
   242 
   243 val _ =
   244   Thy_Output.antiquotation "rail" (Scan.lift (Parse.position Args.name))
   245     (fn _ => output_rules o read);
   246 
   247 end;
   248 
   249 end;
   250