1 (* Title: Pure/General/symbol.ML
2 Author: Markus Wenzel, TU Muenchen
4 Generalized characters with infinitely many named symbols.
16 val spaces: int -> string
17 val is_char: symbol -> bool
18 val is_symbolic: symbol -> bool
19 val is_printable: symbol -> bool
20 val is_utf8_trailer: symbol -> bool
21 val name_of: symbol -> string
23 val is_eof: symbol -> bool
24 val not_eof: symbol -> bool
25 val stopper: symbol Scan.stopper
27 val is_sync: symbol -> bool
29 val end_malformed: symbol
30 val separate_chars: string -> string
31 val is_regular: symbol -> bool
32 val is_ascii: symbol -> bool
33 val is_ascii_letter: symbol -> bool
34 val is_ascii_digit: symbol -> bool
35 val is_ascii_hex: symbol -> bool
36 val is_ascii_quasi: symbol -> bool
37 val is_ascii_blank: symbol -> bool
38 val is_ascii_lower: symbol -> bool
39 val is_ascii_upper: symbol -> bool
40 val to_ascii_lower: symbol -> symbol
41 val to_ascii_upper: symbol -> symbol
42 val is_raw: symbol -> bool
43 val decode_raw: symbol -> string
44 val encode_raw: string -> string
45 datatype sym = Char of string | Sym of string | Ctrl of string | Raw of string
46 val decode: symbol -> sym
47 datatype kind = Letter | Digit | Quasi | Blank | Other
48 val kind: symbol -> kind
49 val is_letter: symbol -> bool
50 val is_digit: symbol -> bool
51 val is_quasi: symbol -> bool
52 val is_blank: symbol -> bool
53 val is_quasi_letter: symbol -> bool
54 val is_letdig: symbol -> bool
55 val is_ident: symbol list -> bool
56 val beginning: int -> symbol list -> string
57 val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
58 val scan_id: string list -> string * string list
59 val source: {do_recover: bool} -> (string, 'a) Source.source ->
60 (symbol, (string, 'a) Source.source) Source.source
61 val explode: string -> symbol list
62 val escape: string -> string
63 val strip_blanks: string -> string
64 val bump_init: string -> string
65 val bump_string: string -> string
66 val length: symbol list -> int
68 val output: string -> output * int
71 structure Symbol: SYMBOL =
76 (*Symbols, which are considered the smallest entities of any Isabelle
77 string, may be of the following form:
80 (2) regular symbols: \<ident>
81 (3) control symbols: \<^ident>
82 (4) raw control symbols: \<^raw:...>, where "..." may be any printable
83 character (excluding ".", ">"), or \<^raw000>
85 Output is subject to the print_mode variable (default: verbatim),
86 actual interpretation in display is up to front-end tools.
100 val small_spaces = Vector.tabulate (65, fn i => Library.replicate_string i space);
103 if k < 64 then Vector.sub (small_spaces, k)
104 else Library.replicate_string (k div 64) (Vector.sub (small_spaces, 64)) ^
105 Vector.sub (small_spaces, k mod 64);
108 fun is_char s = size s = 1;
111 String.isPrefix "\\<" s andalso not (String.isPrefix "\\<^" s);
114 if is_char s then ord space <= ord s andalso ord s <= ord "~"
115 else not (String.isPrefix "\\<^" s);
117 fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192;
120 (* input source control *)
123 fun is_eof s = s = eof;
124 fun not_eof s = s <> eof;
125 val stopper = Scan.stopper (K eof) is_eof;
127 val sync = "\\<^sync>";
128 fun is_sync s = s = sync;
130 val malformed = "[[";
131 val end_malformed = "]]";
133 val separate_chars = explode #> space_implode space;
134 fun malformed_msg s = "Malformed symbolic character: " ^ quote (separate_chars s);
137 not_eof s andalso s <> sync andalso s <> malformed andalso s <> end_malformed;
139 fun name_of s = if is_symbolic s
140 then (unsuffix ">" o unprefix "\\<") s
141 else error (malformed_msg s);
146 fun is_ascii s = is_char s andalso ord s < 128;
148 fun is_ascii_letter s =
150 (ord "A" <= ord s andalso ord s <= ord "Z" orelse
151 ord "a" <= ord s andalso ord s <= ord "z");
153 fun is_ascii_digit s =
154 is_char s andalso ord "0" <= ord s andalso ord s <= ord "9";
158 (ord "0" <= ord s andalso ord s <= ord "9" orelse
159 ord "A" <= ord s andalso ord s <= ord "F" orelse
160 ord "a" <= ord s andalso ord s <= ord "f");
162 fun is_ascii_quasi "_" = true
163 | is_ascii_quasi "'" = true
164 | is_ascii_quasi _ = false;
167 fn " " => true | "\t" => true | "\n" => true | "\^K" => true | "\^L" => true | "\^M" => true
170 fun is_ascii_lower s = is_char s andalso (ord "a" <= ord s andalso ord s <= ord "z");
171 fun is_ascii_upper s = is_char s andalso (ord "A" <= ord s andalso ord s <= ord "Z");
173 fun to_ascii_lower s = if is_ascii_upper s then chr (ord s + ord "a" - ord "A") else s;
174 fun to_ascii_upper s = if is_ascii_lower s then chr (ord s + ord "A" - ord "a") else s;
180 ord space <= ord c andalso ord c <= ord "~" andalso c <> "." andalso c <> ">"
183 fun encode_raw "" = ""
186 val raw0 = enclose "\\<^raw:" ">";
187 val raw1 = raw0 o implode;
188 val raw2 = enclose "\\<^raw" ">" o string_of_int o ord;
190 fun encode cs = enc (Library.take_prefix raw_chr cs)
191 and enc ([], []) = []
192 | enc (cs, []) = [raw1 cs]
193 | enc ([], d :: ds) = raw2 d :: encode ds
194 | enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds;
196 if exists_string (not o raw_chr) str then implode (encode (explode str))
205 val drop_blanks = #1 o Library.take_suffix is_ascii_blank;
206 val all_cs = drop_blanks cs;
207 val dots = if length all_cs > n then " ..." else "";
209 (drop_blanks (Library.take (n, all_cs))
210 |> map (fn c => if is_ascii_blank c then space else c)
218 String.isPrefix "\\<^raw" s andalso String.isSuffix ">" s;
221 if not (is_raw s) then error (malformed_msg s)
222 else if String.isPrefix "\\<^raw:" s then String.substring (s, 7, size s - 8)
223 else chr (#1 (Library.read_int (explode (String.substring (s, 6, size s - 7)))));
226 (* symbol variants *)
228 datatype sym = Char of string | Sym of string | Ctrl of string | Raw of string;
231 if is_char s then Char s
232 else if is_raw s then Raw (decode_raw s)
233 else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4))
234 else if String.isPrefix "\\<" s then Sym (String.substring (s, 2, size s - 3))
235 else error (malformed_msg s);
238 (* standard symbol kinds *)
240 datatype kind = Letter | Digit | Quasi | Blank | Other;
243 val symbol_kinds = Symtab.make
348 ("\\<alpha>", Letter),
349 ("\\<beta>", Letter),
350 ("\\<gamma>", Letter),
351 ("\\<delta>", Letter),
352 ("\\<epsilon>", Letter),
353 ("\\<zeta>", Letter),
355 ("\\<theta>", Letter),
356 ("\\<iota>", Letter),
357 ("\\<kappa>", Letter),
358 ("\\<lambda>", Other), (*sic!*)
364 ("\\<sigma>", Letter),
366 ("\\<upsilon>", Letter),
370 ("\\<omega>", Letter),
371 ("\\<Gamma>", Letter),
372 ("\\<Delta>", Letter),
373 ("\\<Theta>", Letter),
374 ("\\<Lambda>", Letter),
377 ("\\<Sigma>", Letter),
378 ("\\<Upsilon>", Letter),
381 ("\\<Omega>", Letter),
382 ("\\<^isub>", Letter),
383 ("\\<^isup>", Letter),
384 ("\\<spacespace>", Blank)];
387 if is_ascii_letter s then Letter
388 else if is_ascii_digit s then Digit
389 else if is_ascii_quasi s then Quasi
390 else if is_ascii_blank s then Blank
391 else if is_char s then Other
392 else the_default Other (Symtab.lookup symbol_kinds s);
395 fun is_letter s = kind s = Letter;
396 fun is_digit s = kind s = Digit;
397 fun is_quasi s = kind s = Quasi;
398 fun is_blank s = kind s = Blank;
400 fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end;
401 fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end;
403 fun is_ident [] = false
404 | is_ident (c :: cs) = is_letter c andalso forall is_letdig cs;
410 (* scanning through symbols *)
412 fun scanner msg scan chs =
414 fun message (cs, NONE) = msg ^ ": " ^ quote (beginning 10 cs)
415 | message (cs, SOME msg') = msg ^ ", " ^ msg' ^ ": " ^ quote (beginning 10 cs);
416 val fin_scan = Scan.error (Scan.finite stopper (!! message scan));
418 (case fin_scan chs of
419 (result, []) => result
420 | (_, rest) => error (message (rest, NONE)))
423 val scan_id = Scan.one is_letter ^^ (Scan.many is_letdig >> implode);
430 fun is_plain s = s <> "\^M" andalso s <> "\\" andalso not_eof s;
432 val scan_encoded_newline =
433 $$ "\^M" -- $$ "\n" >> K "\n" ||
434 $$ "\^M" >> K "\n" ||
435 $$ "\\" -- Scan.optional ($$ "\\") "" -- Scan.this_string "<^newline>" >> K "\n";
438 Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) ||
439 Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode);
443 scan_encoded_newline ||
444 (($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^
445 !! (fn (cs, _) => malformed_msg (beginning 10 ("\\" :: "<" :: cs)))
446 (($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">")) ||
450 Scan.one is_ascii_blank || $$ "\"" || $$ "`" || $$ "\\" ||
451 Scan.this_string "(*" || Scan.this_string "*)" ||
452 Scan.this_string "{*" || Scan.this_string "*}";
455 (Scan.this (explode "\\\\<") || Scan.this (explode "\\<")) @@@
456 Scan.repeat (Scan.unless scan_resync (Scan.one not_eof))
457 >> (fn ss => malformed :: ss @ [end_malformed]);
461 fun source {do_recover} src =
462 Source.source stopper (Scan.bulk scan)
463 (if do_recover then SOME (false, K recover) else NONE) src;
472 fun no_explode [] = true
473 | no_explode ("\\" :: "<" :: _) = false
474 | no_explode ("\^M" :: _) = false
475 | no_explode (_ :: cs) = no_explode cs;
479 fun sym_explode str =
480 let val chs = explode str in
481 if no_explode chs then chs
482 else Source.exhaust (source {do_recover = false} (Source.of_list chs))
490 val escape = implode o map (fn s => if is_char s then s else "\\" ^ s) o sym_explode;
497 |> Library.take_prefix is_blank |> #2
498 |> Library.take_suffix is_blank |> #1
502 (* bump string -- treat as base 26 or base 1 numbers *)
504 fun symbolic_end (_ :: "\\<^isub>" :: _) = true
505 | symbolic_end (_ :: "\\<^isup>" :: _) = true
506 | symbolic_end (s :: _) = is_symbolic s
507 | symbolic_end [] = false;
510 if symbolic_end (rev (sym_explode str)) then str ^ "'"
513 fun bump_string str =
516 | bump ("z" :: ss) = "a" :: bump ss
518 if is_char s andalso ord "a" <= ord s andalso ord s < ord "z"
519 then chr (ord s + 1) :: ss
522 val (ss, qs) = apfst rev (Library.take_suffix is_quasi (sym_explode str));
523 val ss' = if symbolic_end ss then "'" :: ss else bump ss;
524 in implode (rev ss' @ qs) end;
528 (** symbol output **)
533 if not (is_printable s) then (0: int)
534 else if String.isPrefix "\\<long" s then 2
535 else if String.isPrefix "\\<Long" s then 2
536 else if s = "\\<spacespace>" then 2
539 fun sym_length ss = fold (fn s => fn n => sym_len s + n) ss 0;
544 val xsymbolsN = "xsymbols";
546 fun output s = (s, sym_length (sym_explode s));
549 (*final declarations of this structure!*)
550 val explode = sym_explode;
551 val length = sym_length;