src/Pure/General/symbol.ML
author wenzelm
Sat, 05 Jun 2004 13:07:19 +0200
changeset 14873 7efc14398e82
parent 14834 e47744683560
child 14908 944087c00932
permissions -rw-r--r--
added datatype sym, val decode: symbol -> sym;
     1 (*  Title:      Pure/General/symbol.ML
     2     ID:         $Id$
     3     Author:     Markus Wenzel, TU Muenchen
     4     License:    GPL (GNU GENERAL PUBLIC LICENSE)
     5 
     6 Generalized characters with and infinite amount of named symbols.
     7 *)
     8 
     9 signature SYMBOL =
    10 sig
    11   type symbol
    12   val space: symbol
    13   val spaces: int -> symbol
    14   val is_char: symbol -> bool
    15   val is_symbolic: symbol -> bool
    16   val is_printable: symbol -> bool
    17   val eof: symbol
    18   val is_eof: symbol -> bool
    19   val not_eof: symbol -> bool
    20   val stopper: symbol * (symbol -> bool)
    21   val sync: symbol
    22   val is_sync: symbol -> bool
    23   val not_sync: symbol -> bool
    24   val malformed: symbol
    25   val is_ascii: symbol -> bool
    26   val is_ascii_letter: symbol -> bool
    27   val is_ascii_digit: symbol -> bool
    28   val is_ascii_quasi: symbol -> bool
    29   val is_ascii_blank: symbol -> bool
    30   val is_raw: symbol -> bool
    31   val decode_raw: symbol -> string
    32   val encode_raw: string -> symbol list
    33   datatype sym = Char of string | Sym of string | Ctrl of string | Raw of string
    34   exception DECODE of string
    35   val decode: symbol -> sym
    36   datatype kind = Letter | Digit | Quasi | Blank | Other
    37   val kind: symbol -> kind
    38   val is_letter: symbol -> bool
    39   val is_digit: symbol -> bool
    40   val is_quasi: symbol -> bool
    41   val is_blank: symbol -> bool
    42   val is_quasi_letter: symbol -> bool
    43   val is_letdig: symbol -> bool
    44   val beginning: int -> symbol list -> string
    45   val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
    46   val scan_id: string list -> string * string list
    47   val scan: string list -> symbol * string list
    48   val source: bool -> (string, 'a) Source.source ->
    49     (symbol, (string, 'a) Source.source) Source.source
    50   val explode: string -> symbol list
    51   val strip_blanks: string -> string
    52   val bump_init: string -> string
    53   val bump_string: string -> string
    54   val length: symbol list -> int
    55   val plain_output: string -> string
    56   val default_output: string -> string * real
    57   val default_indent: string * int -> string
    58   val default_raw: string -> string
    59   val symbolsN: string
    60   val xsymbolsN: string
    61 end;
    62 
    63 structure Symbol: SYMBOL =
    64 struct
    65 
    66 (** type symbol **)
    67 
    68 (*Symbols, which are considered the smallest entities of any Isabelle
    69   string, may be of the following form:
    70 
    71     (1) ASCII symbols: a
    72     (2) printable symbols: \<ident>
    73     (3) control symbols: \<^ident>
    74     (4) raw control symbols: \<^raw:...>, where "..." may be any printable
    75         character excluding ">", or \<^rawNNN> where NNN are digits
    76 
    77   Output is subject to the print_mode variable (default: verbatim),
    78   actual interpretation in display is up to front-end tools.
    79 
    80   Proper symbols may optionally start with "\\" instead of just "\"
    81   for compatibility with ML string literals (e.g. used in old-style
    82   theory files and ML proof scripts).  To be on the safe side, the
    83   default output of these symbols will start with the double "\\".
    84 *)
    85 
    86 type symbol = string;
    87 
    88 val space = " ";
    89 fun spaces k = Library.replicate_string k space;
    90 
    91 fun is_char s = size s = 1;
    92 
    93 fun is_symbolic s =
    94   String.isPrefix "\\<" s andalso not (String.isPrefix "\\<^" s);
    95 
    96 fun is_printable s =
    97   if is_char s then ord space <= ord s andalso ord s <= ord "~"
    98   else not (String.isPrefix "\\<^" s);
    99 
   100 
   101 (* input source control *)
   102 
   103 val eof = "";
   104 fun is_eof s = s = eof;
   105 fun not_eof s = s <> eof;
   106 val stopper = (eof, is_eof);
   107 
   108 val sync = "\\<^sync>";
   109 fun is_sync s = s = sync;
   110 fun not_sync s = s <> sync;
   111 
   112 val malformed = "\\<^malformed>";
   113 
   114 
   115 (* ascii symbols *)
   116 
   117 fun is_ascii s = is_char s andalso ord s < 128;
   118 
   119 fun is_ascii_letter s =
   120   is_char s andalso
   121    (ord "A" <= ord s andalso ord s <= ord "Z" orelse
   122     ord "a" <= ord s andalso ord s <= ord "z");
   123 
   124 fun is_ascii_digit s =
   125   is_char s andalso ord "0" <= ord s andalso ord s <= ord "9";
   126 
   127 fun is_ascii_quasi "_" = true
   128   | is_ascii_quasi "'" = true
   129   | is_ascii_quasi _ = false;
   130 
   131 val is_ascii_blank =
   132   fn " " => true | "\t" => true | "\r" => true | "\n" => true | "\^L" => true
   133     | _ => false;
   134 
   135 
   136 (* raw symbols *)
   137 
   138 fun is_raw s =
   139   String.isPrefix "\\<^raw" s andalso String.substring (s, size s - 1, 1) = ">";
   140 
   141 fun decode_raw s =
   142   if not (is_raw s) then "*** BAD RAW OUTPUT " ^ s ^ " ***"
   143   else if String.isPrefix "\\<^raw:" s then String.substring (s, 7, size s - 8)
   144   else chr (#1 (Library.read_int (explode (String.substring (s, 6, size s - 7)))));
   145 
   146 local
   147 
   148 fun raw_chr c = ord space <= ord c andalso ord c <= ord "~" andalso c <> ">";
   149 
   150 val raw1 = enclose "\\<^raw:" ">" o implode;
   151 val raw2 = enclose "\\<^raw" ">" o string_of_int o ord;
   152 
   153 fun encode cs = enc (take_prefix raw_chr cs)
   154 and enc ([], []) = []
   155   | enc (cs, []) = [raw1 cs]
   156   | enc ([], d :: ds) = raw2 d :: encode ds
   157   | enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds;
   158 
   159 in
   160 
   161 val scan_raw =
   162  (Scan.this (explode "raw:") -- Scan.any raw_chr ||
   163   Scan.this (explode "raw") -- Scan.any1 is_ascii_digit) >> (implode o op @);
   164 
   165 fun encode_raw s =
   166   if exists_string (not o raw_chr) s then encode (explode s)
   167   else [enclose "\\<^raw:" ">" s];
   168 
   169 end;
   170 
   171 
   172 (* symbol variants *)
   173 
   174 datatype sym = Char of string | Sym of string | Ctrl of string | Raw of string;
   175 
   176 exception DECODE of string;
   177 
   178 fun decode s =
   179   if is_char s then Char s
   180   else if is_raw s then Raw (decode_raw s)
   181   else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4))
   182   else if String.isPrefix "\\<" s then Sym (String.substring (s, 2, size s - 3))
   183   else raise DECODE s;   (*NB: no error message in order to avoid looping in output!*)
   184 
   185 
   186 (* standard symbol kinds *)
   187 
   188 datatype kind = Letter | Digit | Quasi | Blank | Other;
   189 
   190 local
   191   val symbol_kinds = Symtab.make
   192    [("\\<A>", Letter),
   193     ("\\<B>", Letter),
   194     ("\\<C>", Letter),
   195     ("\\<D>", Letter),
   196     ("\\<E>", Letter),
   197     ("\\<F>", Letter),
   198     ("\\<G>", Letter),
   199     ("\\<H>", Letter),
   200     ("\\<I>", Letter),
   201     ("\\<J>", Letter),
   202     ("\\<K>", Letter),
   203     ("\\<L>", Letter),
   204     ("\\<M>", Letter),
   205     ("\\<N>", Letter),
   206     ("\\<O>", Letter),
   207     ("\\<P>", Letter),
   208     ("\\<Q>", Letter),
   209     ("\\<R>", Letter),
   210     ("\\<S>", Letter),
   211     ("\\<T>", Letter),
   212     ("\\<U>", Letter),
   213     ("\\<V>", Letter),
   214     ("\\<W>", Letter),
   215     ("\\<X>", Letter),
   216     ("\\<Y>", Letter),
   217     ("\\<Z>", Letter),
   218     ("\\<a>", Letter),
   219     ("\\<b>", Letter),
   220     ("\\<c>", Letter),
   221     ("\\<d>", Letter),
   222     ("\\<e>", Letter),
   223     ("\\<f>", Letter),
   224     ("\\<g>", Letter),
   225     ("\\<h>", Letter),
   226     ("\\<i>", Letter),
   227     ("\\<j>", Letter),
   228     ("\\<k>", Letter),
   229     ("\\<l>", Letter),
   230     ("\\<m>", Letter),
   231     ("\\<n>", Letter),
   232     ("\\<o>", Letter),
   233     ("\\<p>", Letter),
   234     ("\\<q>", Letter),
   235     ("\\<r>", Letter),
   236     ("\\<s>", Letter),
   237     ("\\<t>", Letter),
   238     ("\\<u>", Letter),
   239     ("\\<v>", Letter),
   240     ("\\<w>", Letter),
   241     ("\\<x>", Letter),
   242     ("\\<y>", Letter),
   243     ("\\<z>", Letter),
   244     ("\\<AA>", Letter),
   245     ("\\<BB>", Letter),
   246     ("\\<CC>", Letter),
   247     ("\\<DD>", Letter),
   248     ("\\<EE>", Letter),
   249     ("\\<FF>", Letter),
   250     ("\\<GG>", Letter),
   251     ("\\<HH>", Letter),
   252     ("\\<II>", Letter),
   253     ("\\<JJ>", Letter),
   254     ("\\<KK>", Letter),
   255     ("\\<LL>", Letter),
   256     ("\\<MM>", Letter),
   257     ("\\<NN>", Letter),
   258     ("\\<OO>", Letter),
   259     ("\\<PP>", Letter),
   260     ("\\<QQ>", Letter),
   261     ("\\<RR>", Letter),
   262     ("\\<SS>", Letter),
   263     ("\\<TT>", Letter),
   264     ("\\<UU>", Letter),
   265     ("\\<VV>", Letter),
   266     ("\\<WW>", Letter),
   267     ("\\<XX>", Letter),
   268     ("\\<YY>", Letter),
   269     ("\\<ZZ>", Letter),
   270     ("\\<aa>", Letter),
   271     ("\\<bb>", Letter),
   272     ("\\<cc>", Letter),
   273     ("\\<dd>", Letter),
   274     ("\\<ee>", Letter),
   275     ("\\<ff>", Letter),
   276     ("\\<gg>", Letter),
   277     ("\\<hh>", Letter),
   278     ("\\<ii>", Letter),
   279     ("\\<jj>", Letter),
   280     ("\\<kk>", Letter),
   281     ("\\<ll>", Letter),
   282     ("\\<mm>", Letter),
   283     ("\\<nn>", Letter),
   284     ("\\<oo>", Letter),
   285     ("\\<pp>", Letter),
   286     ("\\<qq>", Letter),
   287     ("\\<rr>", Letter),
   288     ("\\<ss>", Letter),
   289     ("\\<tt>", Letter),
   290     ("\\<uu>", Letter),
   291     ("\\<vv>", Letter),
   292     ("\\<ww>", Letter),
   293     ("\\<xx>", Letter),
   294     ("\\<yy>", Letter),
   295     ("\\<zz>", Letter),
   296     ("\\<alpha>", Letter),
   297     ("\\<beta>", Letter),
   298     ("\\<gamma>", Letter),
   299     ("\\<delta>", Letter),
   300     ("\\<epsilon>", Letter),
   301     ("\\<zeta>", Letter),
   302     ("\\<eta>", Letter),
   303     ("\\<theta>", Letter),
   304     ("\\<iota>", Letter),
   305     ("\\<kappa>", Letter),
   306     ("\\<lambda>", Other),      (*sic!*)
   307     ("\\<mu>", Letter),
   308     ("\\<nu>", Letter),
   309     ("\\<xi>", Letter),
   310     ("\\<pi>", Letter),
   311     ("\\<rho>", Letter),
   312     ("\\<sigma>", Letter),
   313     ("\\<tau>", Letter),
   314     ("\\<upsilon>", Letter),
   315     ("\\<phi>", Letter),
   316     ("\\<psi>", Letter),
   317     ("\\<omega>", Letter),
   318     ("\\<Gamma>", Letter),
   319     ("\\<Delta>", Letter),
   320     ("\\<Theta>", Letter),
   321     ("\\<Lambda>", Letter),
   322     ("\\<Xi>", Letter),
   323     ("\\<Pi>", Letter),
   324     ("\\<Sigma>", Letter),
   325     ("\\<Upsilon>", Letter),
   326     ("\\<Phi>", Letter),
   327     ("\\<Psi>", Letter),
   328     ("\\<Omega>", Letter),
   329     ("\\<^isub>", Quasi),
   330     ("\\<^isup>", Quasi),
   331     ("\\<spacespace>", Blank)];
   332 in
   333   fun kind s =
   334     if is_ascii_letter s then Letter
   335     else if is_ascii_digit s then Digit
   336     else if is_ascii_quasi s then Quasi
   337     else if is_ascii_blank s then Blank
   338     else if is_char s then Other
   339     else if_none (Symtab.lookup (symbol_kinds, s)) Other;
   340 end;
   341 
   342 fun is_letter s = kind s = Letter;
   343 fun is_digit s = kind s = Digit;
   344 fun is_quasi s = kind s = Quasi;
   345 fun is_blank s = kind s = Blank;
   346 
   347 fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end;
   348 fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end;
   349 
   350 
   351 
   352 (** symbol input **)
   353 
   354 (* scanning through symbols *)
   355 
   356 fun beginning n raw_ss =
   357   let
   358     val (all_ss, _) = Library.take_suffix is_blank raw_ss;
   359     val dots = if length all_ss > n then " ..." else "";
   360     val (ss, _) = Library.take_suffix is_blank (Library.take (n, all_ss));
   361   in implode (map (fn s => if is_blank s then space else s) ss) ^ dots end;
   362 
   363 fun scanner msg scan chs =
   364   let
   365     fun err_msg cs = msg ^ ": " ^ beginning 10 cs;
   366     val fin_scan = Scan.error (Scan.finite stopper (!! (fn (cs, _) => err_msg cs) scan));
   367   in
   368     (case fin_scan chs of
   369       (result, []) => result
   370     | (_, rest) => error (err_msg rest))
   371   end;
   372 
   373 
   374 (* scan *)
   375 
   376 val scan_id = Scan.one is_letter ^^ (Scan.any is_letdig >> implode);
   377 
   378 local
   379 
   380 val scan_encoded_newline =
   381   $$ "\r" -- $$ "\n" >> K "\n" ||
   382   $$ "\r" >> K "\n" ||
   383   Scan.optional ($$ "\\") "" -- Scan.this (explode "\\<^newline>") >> K "\n";
   384 
   385 in
   386 
   387 val scan =
   388   scan_encoded_newline ||
   389   ($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^
   390     !! (fn (cs, _) => "Malformed symbolic character specification: \\" ^ "<" ^ beginning 10 cs)
   391        (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">") ||
   392   Scan.one not_eof;
   393 
   394 end;
   395 
   396 
   397 (* source *)
   398 
   399 val recover = Scan.any ((not o is_blank) andf not_eof) >> K [malformed];
   400 
   401 fun source do_recover src =
   402   Source.source stopper (Scan.bulk scan) (if do_recover then Some recover else None) src;
   403 
   404 
   405 (* explode *)
   406 
   407 fun no_explode [] = true
   408   | no_explode ("\\" :: "<" :: _) = false
   409   | no_explode ("\r" :: _) = false
   410   | no_explode (_ :: cs) = no_explode cs;
   411 
   412 fun sym_explode str =
   413   let val chs = explode str in
   414     if no_explode chs then chs
   415     else the (Scan.read stopper (Scan.repeat scan) chs)
   416   end;
   417 
   418 
   419 (* blanks *)
   420 
   421 fun strip_blanks s =
   422   sym_explode s
   423   |> Library.take_prefix is_blank |> #2
   424   |> Library.take_suffix is_blank |> #1
   425   |> implode;
   426 
   427 
   428 (* bump string -- treat as base 26 or base 1 numbers *)
   429 
   430 fun ends_symbolic (_ :: "\\<^isup>" :: _) = true
   431   | ends_symbolic (_ :: "\\<^isub>" :: _) = true
   432   | ends_symbolic (s :: _) = is_symbolic s
   433   | ends_symbolic [] = false;
   434 
   435 fun bump_init str =
   436   if ends_symbolic (rev (sym_explode str)) then str ^ "'"
   437   else str ^ "a";
   438 
   439 fun bump_string str =
   440   let
   441     fun bump [] = ["a"]
   442       | bump ("z" :: ss) = "a" :: bump ss
   443       | bump (s :: ss) =
   444           if is_char s andalso ord "a" <= ord s andalso ord s < ord "z"
   445           then chr (ord s + 1) :: ss
   446           else "a" :: s :: ss;
   447 
   448     val (ss, qs) = apfst rev (Library.take_suffix is_quasi (sym_explode str));
   449     val ss' = if ends_symbolic ss then "'" :: ss else bump ss;
   450   in implode (rev ss' @ qs) end;
   451 
   452 
   453 
   454 (** symbol output **)
   455 
   456 fun sym_len s =
   457   if not (is_printable s) then 0
   458   else if String.isPrefix "\\<long" s then 2
   459   else if String.isPrefix "\\<Long" s then 2
   460   else if s = "\\<spacespace>" then 2
   461   else 1;
   462 
   463 fun sym_length ss = foldl (fn (n, s) => sym_len s + n) (0, ss);
   464 
   465 
   466 (* default output *)
   467 
   468 local
   469 
   470 fun string_size s = (s, real (size s));
   471 
   472 fun sym_output s =
   473   if is_char s then s
   474   else if is_raw s then decode_raw s
   475   else "\\" ^ s;
   476 
   477 in
   478 
   479 fun default_output s =
   480   if not (exists_string (equal "\\") s) then string_size s
   481   else string_size (implode (map sym_output (sym_explode s)));
   482 
   483 end;
   484 
   485 val plain_output = #1 o default_output;
   486 
   487 fun default_indent (_: string, k) = spaces k;
   488 val default_raw = implode o encode_raw;
   489 
   490 val _ = Output.add_mode "" (default_output, default_indent, default_raw);
   491 
   492 
   493 (* print modes *)
   494 
   495 val symbolsN = "symbols";
   496 val xsymbolsN = "xsymbols";
   497 
   498 
   499 (*final declarations of this structure!*)
   500 val length = sym_length;
   501 val explode = sym_explode;
   502 
   503 end;