1 (* Title: Pure/General/symbol.ML
3 Author: Markus Wenzel, TU Muenchen
4 License: GPL (GNU GENERAL PUBLIC LICENSE)
6 Generalized characters with and infinite amount of named symbols.
13 val spaces: int -> symbol
14 val is_char: symbol -> bool
15 val is_symbolic: symbol -> bool
16 val is_printable: symbol -> bool
18 val is_eof: symbol -> bool
19 val not_eof: symbol -> bool
20 val stopper: symbol * (symbol -> bool)
22 val is_sync: symbol -> bool
23 val not_sync: symbol -> bool
25 val is_ascii: symbol -> bool
26 val is_ascii_letter: symbol -> bool
27 val is_ascii_digit: symbol -> bool
28 val is_ascii_quasi: symbol -> bool
29 val is_ascii_blank: symbol -> bool
30 val is_raw: symbol -> bool
31 val decode_raw: symbol -> string
32 val encode_raw: string -> symbol list
33 datatype sym = Char of string | Sym of string | Ctrl of string | Raw of string
34 exception DECODE of string
35 val decode: symbol -> sym
36 datatype kind = Letter | Digit | Quasi | Blank | Other
37 val kind: symbol -> kind
38 val is_letter: symbol -> bool
39 val is_digit: symbol -> bool
40 val is_quasi: symbol -> bool
41 val is_blank: symbol -> bool
42 val is_quasi_letter: symbol -> bool
43 val is_letdig: symbol -> bool
44 val beginning: int -> symbol list -> string
45 val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
46 val scan_id: string list -> string * string list
47 val scan: string list -> symbol * string list
48 val source: bool -> (string, 'a) Source.source ->
49 (symbol, (string, 'a) Source.source) Source.source
50 val explode: string -> symbol list
51 val strip_blanks: string -> string
52 val bump_init: string -> string
53 val bump_string: string -> string
54 val length: symbol list -> int
55 val plain_output: string -> string
56 val default_output: string -> string * real
57 val default_indent: string * int -> string
58 val default_raw: string -> string
63 structure Symbol: SYMBOL =
68 (*Symbols, which are considered the smallest entities of any Isabelle
69 string, may be of the following form:
72 (2) printable symbols: \<ident>
73 (3) control symbols: \<^ident>
74 (4) raw control symbols: \<^raw:...>, where "..." may be any printable
75 character excluding ">", or \<^rawNNN> where NNN are digits
77 Output is subject to the print_mode variable (default: verbatim),
78 actual interpretation in display is up to front-end tools.
80 Proper symbols may optionally start with "\\" instead of just "\"
81 for compatibility with ML string literals (e.g. used in old-style
82 theory files and ML proof scripts). To be on the safe side, the
83 default output of these symbols will start with the double "\\".
89 fun spaces k = Library.replicate_string k space;
91 fun is_char s = size s = 1;
94 String.isPrefix "\\<" s andalso not (String.isPrefix "\\<^" s);
97 if is_char s then ord space <= ord s andalso ord s <= ord "~"
98 else not (String.isPrefix "\\<^" s);
101 (* input source control *)
104 fun is_eof s = s = eof;
105 fun not_eof s = s <> eof;
106 val stopper = (eof, is_eof);
108 val sync = "\\<^sync>";
109 fun is_sync s = s = sync;
110 fun not_sync s = s <> sync;
112 val malformed = "\\<^malformed>";
117 fun is_ascii s = is_char s andalso ord s < 128;
119 fun is_ascii_letter s =
121 (ord "A" <= ord s andalso ord s <= ord "Z" orelse
122 ord "a" <= ord s andalso ord s <= ord "z");
124 fun is_ascii_digit s =
125 is_char s andalso ord "0" <= ord s andalso ord s <= ord "9";
127 fun is_ascii_quasi "_" = true
128 | is_ascii_quasi "'" = true
129 | is_ascii_quasi _ = false;
132 fn " " => true | "\t" => true | "\r" => true | "\n" => true | "\^L" => true
139 String.isPrefix "\\<^raw" s andalso String.substring (s, size s - 1, 1) = ">";
142 if not (is_raw s) then "*** BAD RAW OUTPUT " ^ s ^ " ***"
143 else if String.isPrefix "\\<^raw:" s then String.substring (s, 7, size s - 8)
144 else chr (#1 (Library.read_int (explode (String.substring (s, 6, size s - 7)))));
148 fun raw_chr c = ord space <= ord c andalso ord c <= ord "~" andalso c <> ">";
150 val raw1 = enclose "\\<^raw:" ">" o implode;
151 val raw2 = enclose "\\<^raw" ">" o string_of_int o ord;
153 fun encode cs = enc (take_prefix raw_chr cs)
154 and enc ([], []) = []
155 | enc (cs, []) = [raw1 cs]
156 | enc ([], d :: ds) = raw2 d :: encode ds
157 | enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds;
162 (Scan.this (explode "raw:") -- Scan.any raw_chr ||
163 Scan.this (explode "raw") -- Scan.any1 is_ascii_digit) >> (implode o op @);
166 if exists_string (not o raw_chr) s then encode (explode s)
167 else [enclose "\\<^raw:" ">" s];
172 (* symbol variants *)
174 datatype sym = Char of string | Sym of string | Ctrl of string | Raw of string;
176 exception DECODE of string;
179 if is_char s then Char s
180 else if is_raw s then Raw (decode_raw s)
181 else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4))
182 else if String.isPrefix "\\<" s then Sym (String.substring (s, 2, size s - 3))
183 else raise DECODE s; (*NB: no error message in order to avoid looping in output!*)
186 (* standard symbol kinds *)
188 datatype kind = Letter | Digit | Quasi | Blank | Other;
191 val symbol_kinds = Symtab.make
296 ("\\<alpha>", Letter),
297 ("\\<beta>", Letter),
298 ("\\<gamma>", Letter),
299 ("\\<delta>", Letter),
300 ("\\<epsilon>", Letter),
301 ("\\<zeta>", Letter),
303 ("\\<theta>", Letter),
304 ("\\<iota>", Letter),
305 ("\\<kappa>", Letter),
306 ("\\<lambda>", Other), (*sic!*)
312 ("\\<sigma>", Letter),
314 ("\\<upsilon>", Letter),
317 ("\\<omega>", Letter),
318 ("\\<Gamma>", Letter),
319 ("\\<Delta>", Letter),
320 ("\\<Theta>", Letter),
321 ("\\<Lambda>", Letter),
324 ("\\<Sigma>", Letter),
325 ("\\<Upsilon>", Letter),
328 ("\\<Omega>", Letter),
329 ("\\<^isub>", Quasi),
330 ("\\<^isup>", Quasi),
331 ("\\<spacespace>", Blank)];
334 if is_ascii_letter s then Letter
335 else if is_ascii_digit s then Digit
336 else if is_ascii_quasi s then Quasi
337 else if is_ascii_blank s then Blank
338 else if is_char s then Other
339 else if_none (Symtab.lookup (symbol_kinds, s)) Other;
342 fun is_letter s = kind s = Letter;
343 fun is_digit s = kind s = Digit;
344 fun is_quasi s = kind s = Quasi;
345 fun is_blank s = kind s = Blank;
347 fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end;
348 fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end;
354 (* scanning through symbols *)
356 fun beginning n raw_ss =
358 val (all_ss, _) = Library.take_suffix is_blank raw_ss;
359 val dots = if length all_ss > n then " ..." else "";
360 val (ss, _) = Library.take_suffix is_blank (Library.take (n, all_ss));
361 in implode (map (fn s => if is_blank s then space else s) ss) ^ dots end;
363 fun scanner msg scan chs =
365 fun err_msg cs = msg ^ ": " ^ beginning 10 cs;
366 val fin_scan = Scan.error (Scan.finite stopper (!! (fn (cs, _) => err_msg cs) scan));
368 (case fin_scan chs of
369 (result, []) => result
370 | (_, rest) => error (err_msg rest))
376 val scan_id = Scan.one is_letter ^^ (Scan.any is_letdig >> implode);
380 val scan_encoded_newline =
381 $$ "\r" -- $$ "\n" >> K "\n" ||
383 Scan.optional ($$ "\\") "" -- Scan.this (explode "\\<^newline>") >> K "\n";
388 scan_encoded_newline ||
389 ($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^
390 !! (fn (cs, _) => "Malformed symbolic character specification: \\" ^ "<" ^ beginning 10 cs)
391 (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">") ||
399 val recover = Scan.any ((not o is_blank) andf not_eof) >> K [malformed];
401 fun source do_recover src =
402 Source.source stopper (Scan.bulk scan) (if do_recover then Some recover else None) src;
407 fun no_explode [] = true
408 | no_explode ("\\" :: "<" :: _) = false
409 | no_explode ("\r" :: _) = false
410 | no_explode (_ :: cs) = no_explode cs;
412 fun sym_explode str =
413 let val chs = explode str in
414 if no_explode chs then chs
415 else the (Scan.read stopper (Scan.repeat scan) chs)
423 |> Library.take_prefix is_blank |> #2
424 |> Library.take_suffix is_blank |> #1
428 (* bump string -- treat as base 26 or base 1 numbers *)
430 fun ends_symbolic (_ :: "\\<^isup>" :: _) = true
431 | ends_symbolic (_ :: "\\<^isub>" :: _) = true
432 | ends_symbolic (s :: _) = is_symbolic s
433 | ends_symbolic [] = false;
436 if ends_symbolic (rev (sym_explode str)) then str ^ "'"
439 fun bump_string str =
442 | bump ("z" :: ss) = "a" :: bump ss
444 if is_char s andalso ord "a" <= ord s andalso ord s < ord "z"
445 then chr (ord s + 1) :: ss
448 val (ss, qs) = apfst rev (Library.take_suffix is_quasi (sym_explode str));
449 val ss' = if ends_symbolic ss then "'" :: ss else bump ss;
450 in implode (rev ss' @ qs) end;
454 (** symbol output **)
457 if not (is_printable s) then 0
458 else if String.isPrefix "\\<long" s then 2
459 else if String.isPrefix "\\<Long" s then 2
460 else if s = "\\<spacespace>" then 2
463 fun sym_length ss = foldl (fn (n, s) => sym_len s + n) (0, ss);
470 fun string_size s = (s, real (size s));
474 else if is_raw s then decode_raw s
479 fun default_output s =
480 if not (exists_string (equal "\\") s) then string_size s
481 else string_size (implode (map sym_output (sym_explode s)));
485 val plain_output = #1 o default_output;
487 fun default_indent (_: string, k) = spaces k;
488 val default_raw = implode o encode_raw;
490 val _ = Output.add_mode "" (default_output, default_indent, default_raw);
495 val symbolsN = "symbols";
496 val xsymbolsN = "xsymbols";
499 (*final declarations of this structure!*)
500 val length = sym_length;
501 val explode = sym_explode;