wenzelm@27763
|
1 |
(* Title: Pure/General/symbol_pos.ML
|
wenzelm@27763
|
2 |
ID: $Id$
|
wenzelm@27763
|
3 |
Author: Makarius
|
wenzelm@27763
|
4 |
|
wenzelm@27763
|
5 |
Symbols with explicit position information.
|
wenzelm@27763
|
6 |
*)
|
wenzelm@27763
|
7 |
|
wenzelm@27763
|
8 |
signature BASIC_SYMBOL_POS =
|
wenzelm@27763
|
9 |
sig
|
wenzelm@27763
|
10 |
type T = Symbol.symbol * Position.T
|
wenzelm@27763
|
11 |
val symbol: T -> Symbol.symbol
|
wenzelm@27763
|
12 |
val $$$ : Symbol.symbol -> T list -> T list * T list
|
wenzelm@27763
|
13 |
val ~$$$ : Symbol.symbol -> T list -> T list * T list
|
wenzelm@27763
|
14 |
end
|
wenzelm@27763
|
15 |
|
wenzelm@27763
|
16 |
signature SYMBOL_POS =
|
wenzelm@27763
|
17 |
sig
|
wenzelm@27763
|
18 |
include BASIC_SYMBOL_POS
|
wenzelm@27797
|
19 |
val content: T list -> string
|
wenzelm@27852
|
20 |
val untabify_content: T list -> string
|
wenzelm@27763
|
21 |
val is_eof: T -> bool
|
wenzelm@27763
|
22 |
val stopper: T Scan.stopper
|
wenzelm@27763
|
23 |
val !!! : string -> (T list -> 'a) -> T list -> 'a
|
wenzelm@27778
|
24 |
val scan_pos: T list -> Position.T * T list
|
wenzelm@27763
|
25 |
val scan_comment: (string -> (T list -> T list * T list) -> T list -> T list * T list) ->
|
wenzelm@27763
|
26 |
T list -> T list * T list
|
wenzelm@27763
|
27 |
val scan_comment_body: (string -> (T list -> T list * T list) -> T list -> T list * T list) ->
|
wenzelm@27763
|
28 |
T list -> T list * T list
|
wenzelm@27763
|
29 |
val source: Position.T -> (Symbol.symbol, 'a) Source.source ->
|
wenzelm@27763
|
30 |
(T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
|
wenzelm@27778
|
31 |
type text = string
|
wenzelm@27797
|
32 |
val implode: T list -> text
|
wenzelm@27797
|
33 |
val range: T list -> Position.range
|
wenzelm@27797
|
34 |
val implode_range: Position.T -> Position.T -> T list -> text * Position.range
|
wenzelm@27778
|
35 |
val explode: text * Position.T -> T list
|
wenzelm@27763
|
36 |
end;
|
wenzelm@27763
|
37 |
|
wenzelm@27763
|
38 |
structure SymbolPos: SYMBOL_POS =
|
wenzelm@27763
|
39 |
struct
|
wenzelm@27763
|
40 |
|
wenzelm@27763
|
41 |
(* type T *)
|
wenzelm@27763
|
42 |
|
wenzelm@27763
|
43 |
type T = Symbol.symbol * Position.T;
|
wenzelm@27763
|
44 |
|
wenzelm@27763
|
45 |
fun symbol ((s, _): T) = s;
|
wenzelm@27852
|
46 |
|
wenzelm@27852
|
47 |
|
wenzelm@27852
|
48 |
(* content *)
|
wenzelm@27852
|
49 |
|
wenzelm@27797
|
50 |
val content = implode o map symbol;
|
wenzelm@27763
|
51 |
|
wenzelm@27763
|
52 |
|
wenzelm@27864
|
53 |
val tab_width = (8: int);
|
wenzelm@27852
|
54 |
|
wenzelm@27852
|
55 |
fun untabify ("\t", pos) =
|
wenzelm@27852
|
56 |
(case Position.column_of pos of
|
wenzelm@27852
|
57 |
SOME n => Symbol.spaces (tab_width - ((n - 1) mod tab_width))
|
wenzelm@27852
|
58 |
| NONE => error "No column information -- cannot interpret tabulators")
|
wenzelm@27852
|
59 |
| untabify (s, _) = s;
|
wenzelm@27852
|
60 |
|
wenzelm@27852
|
61 |
val untabify_content = implode o map untabify;
|
wenzelm@27852
|
62 |
|
wenzelm@27852
|
63 |
|
wenzelm@27763
|
64 |
(* stopper *)
|
wenzelm@27763
|
65 |
|
wenzelm@27763
|
66 |
fun mk_eof pos = (Symbol.eof, pos);
|
wenzelm@27763
|
67 |
val eof = mk_eof Position.none;
|
wenzelm@27763
|
68 |
|
wenzelm@27763
|
69 |
val is_eof = Symbol.is_eof o symbol;
|
wenzelm@27763
|
70 |
|
wenzelm@27763
|
71 |
val stopper =
|
wenzelm@27763
|
72 |
Scan.stopper (fn [] => eof | inp => mk_eof (List.last inp |-> Position.advance)) is_eof;
|
wenzelm@27763
|
73 |
|
wenzelm@27763
|
74 |
|
wenzelm@27763
|
75 |
(* basic scanners *)
|
wenzelm@27763
|
76 |
|
wenzelm@27763
|
77 |
fun !!! text scan =
|
wenzelm@27763
|
78 |
let
|
wenzelm@27763
|
79 |
fun get_pos [] = " (past end-of-text!)"
|
wenzelm@27763
|
80 |
| get_pos ((_, pos) :: _) = Position.str_of pos;
|
wenzelm@27763
|
81 |
|
wenzelm@27763
|
82 |
fun err (syms, msg) =
|
wenzelm@27763
|
83 |
text ^ get_pos syms ^ " at " ^ Symbol.beginning 10 (map symbol syms) ^
|
wenzelm@27763
|
84 |
(case msg of NONE => "" | SOME s => "\n" ^ s);
|
wenzelm@27763
|
85 |
in Scan.!! err scan end;
|
wenzelm@27763
|
86 |
|
wenzelm@27763
|
87 |
fun $$$ s = Scan.one (fn x => symbol x = s) >> single;
|
wenzelm@27763
|
88 |
fun ~$$$ s = Scan.one (fn x => symbol x <> s) >> single;
|
wenzelm@27763
|
89 |
|
wenzelm@27778
|
90 |
val scan_pos = Scan.ahead (Scan.one (K true)) >> (fn (_, pos): T => pos);
|
wenzelm@27763
|
91 |
|
wenzelm@27763
|
92 |
|
wenzelm@27763
|
93 |
(* ML-style comments *)
|
wenzelm@27763
|
94 |
|
wenzelm@27763
|
95 |
local
|
wenzelm@27763
|
96 |
|
wenzelm@27763
|
97 |
val scan_cmt =
|
wenzelm@27763
|
98 |
Scan.depend (fn (d: int) => $$$ "(" @@@ $$$ "*" >> pair (d + 1)) ||
|
wenzelm@27763
|
99 |
Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ ")" >> pair (d - 1)) ||
|
wenzelm@27763
|
100 |
Scan.lift ($$$ "*" --| Scan.ahead (~$$$ ")")) ||
|
wenzelm@27763
|
101 |
Scan.lift (Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s)) >> single;
|
wenzelm@27763
|
102 |
|
wenzelm@27763
|
103 |
val scan_body = Scan.pass 0 (Scan.repeat scan_cmt >> flat);
|
wenzelm@27763
|
104 |
|
wenzelm@27763
|
105 |
in
|
wenzelm@27763
|
106 |
|
wenzelm@27763
|
107 |
fun scan_comment cut =
|
wenzelm@27763
|
108 |
$$$ "(" @@@ $$$ "*" @@@ cut "missing end of comment" (scan_body @@@ $$$ "*" @@@ $$$ ")");
|
wenzelm@27763
|
109 |
|
wenzelm@27763
|
110 |
fun scan_comment_body cut =
|
wenzelm@27763
|
111 |
$$$ "(" |-- $$$ "*" |-- cut "missing end of comment" (scan_body --| $$$ "*" --| $$$ ")");
|
wenzelm@27763
|
112 |
|
wenzelm@27763
|
113 |
end;
|
wenzelm@27763
|
114 |
|
wenzelm@27763
|
115 |
|
wenzelm@27763
|
116 |
(* source *)
|
wenzelm@27763
|
117 |
|
wenzelm@27763
|
118 |
fun source pos =
|
wenzelm@27763
|
119 |
Source.source' pos Symbol.stopper (Scan.bulk (Scan.depend (fn pos =>
|
wenzelm@27763
|
120 |
Scan.one Symbol.not_eof >> (fn s => (Position.advance s pos, (s, pos)))))) NONE;
|
wenzelm@27763
|
121 |
|
wenzelm@27763
|
122 |
|
wenzelm@27763
|
123 |
(* compact representation -- with Symbol.DEL padding *)
|
wenzelm@27763
|
124 |
|
wenzelm@27778
|
125 |
type text = string;
|
wenzelm@27778
|
126 |
|
wenzelm@27763
|
127 |
fun pad [] = []
|
wenzelm@27763
|
128 |
| pad [(s, _)] = [s]
|
wenzelm@27763
|
129 |
| pad ((s1, pos1) :: (rest as (s2, pos2) :: _)) =
|
wenzelm@27763
|
130 |
let
|
wenzelm@27763
|
131 |
val end_pos1 = Position.advance s1 pos1;
|
wenzelm@27797
|
132 |
val d = Int.max (0, Position.distance_of end_pos1 pos2);
|
wenzelm@27763
|
133 |
in s1 :: replicate d Symbol.DEL @ pad rest end;
|
wenzelm@27763
|
134 |
|
wenzelm@27797
|
135 |
val implode = implode o pad;
|
wenzelm@27763
|
136 |
|
wenzelm@27797
|
137 |
fun range (syms as (_, pos) :: _) =
|
wenzelm@27797
|
138 |
let val pos' = List.last syms |-> Position.advance
|
wenzelm@27797
|
139 |
in Position.range pos pos' end
|
wenzelm@27797
|
140 |
| range [] = Position.no_range;
|
wenzelm@27763
|
141 |
|
wenzelm@27797
|
142 |
fun implode_range pos1 pos2 syms =
|
wenzelm@27797
|
143 |
let val syms' = (("", pos1) :: syms @ [("", pos2)])
|
wenzelm@27797
|
144 |
in (implode syms', range syms') end;
|
wenzelm@27778
|
145 |
|
wenzelm@27763
|
146 |
fun explode (str, pos) =
|
wenzelm@27778
|
147 |
fold_map (fn s => fn p => ((s, p), (Position.advance s p)))
|
wenzelm@27778
|
148 |
(Symbol.explode str) (Position.reset_range pos)
|
wenzelm@27763
|
149 |
|> #1 |> filter_out (fn (s, _) => s = Symbol.DEL);
|
wenzelm@27763
|
150 |
|
wenzelm@27763
|
151 |
end;
|
wenzelm@27763
|
152 |
|
wenzelm@27763
|
153 |
structure BasicSymbolPos: BASIC_SYMBOL_POS = SymbolPos; (*not open by default*)
|
wenzelm@27763
|
154 |
|