1 (* Title: Pure/General/xml.ML
2 Author: David Aspinall, Stefan Berghofer and Markus Wenzel
7 signature XML_DATA_OPS =
15 val properties: Properties.T T
20 val pair: 'a T -> 'b T -> ('a * 'b) T
21 val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
22 val list: 'a T -> 'a list T
23 val option: 'a T -> 'a option T
24 val variant: 'a V list -> 'a T
29 type attributes = Properties.T
31 Elem of Markup.T * tree list
34 val add_content: tree -> Buffer.T -> Buffer.T
35 val content_of: body -> string
37 val text: string -> string
38 val element: string -> attributes -> string list -> string
39 val output_markup: Markup.T -> Output.output * Output.output
40 val string_of: tree -> string
41 val pretty: int -> tree -> Pretty.T
42 val output: tree -> TextIO.outstream -> unit
43 val parse_comments: string list -> unit * string list
44 val parse_string : string -> string option
45 val parse_element: string list -> tree * string list
46 val parse_document: string list -> tree * string list
47 val parse: string -> tree
48 exception XML_ATOM of string
49 exception XML_BODY of body
50 structure Encode: XML_DATA_OPS
51 structure Decode: XML_DATA_OPS
59 type attributes = Properties.T;
62 Elem of Markup.T * tree list
65 type body = tree list;
67 fun add_content (Elem (_, ts)) = fold add_content ts
68 | add_content (Text s) = Buffer.add s;
70 fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;
74 (** string representation **)
76 val header = "<?xml version=\"1.0\"?>\n";
81 fun decode "<" = "<"
83 | decode "&" = "&"
84 | decode "'" = "'"
85 | decode """ = "\""
88 fun encode "<" = "<"
90 | encode "&" = "&"
91 | encode "'" = "'"
92 | encode "\"" = """
95 val text = translate_string encode;
101 space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
103 fun element name atts body =
104 let val b = implode body in
105 if b = "" then enclose "<" "/>" (elem name atts)
106 else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
109 fun output_markup (markup as (name, atts)) =
110 if Markup.is_empty markup then Markup.no_output
111 else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
116 fun buffer_of depth tree =
118 fun traverse _ (Elem ((name, atts), [])) =
119 Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
120 | traverse d (Elem ((name, atts), ts)) =
121 Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
122 traverse_body d ts #>
123 Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
124 | traverse _ (Text s) = Buffer.add (text s)
125 and traverse_body 0 _ = Buffer.add "..."
126 | traverse_body d ts = fold (traverse (d - 1)) ts;
127 in Buffer.empty |> traverse depth tree end;
129 val string_of = Buffer.content o buffer_of ~1;
130 val output = Buffer.output o buffer_of ~1;
132 fun pretty depth tree =
133 Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree));
137 (** XML parsing (slow) **)
141 fun err msg (xs, _) =
142 fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
146 val blanks = Scan.many Symbol.is_blank;
147 val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
148 val regular = Scan.one Symbol.is_regular;
149 fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
151 val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
154 Scan.this_string "<![CDATA[" |--
155 (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
156 Scan.this_string "]]>";
159 (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
160 (($$ "\"" || $$ "'") :|-- (fn s =>
161 (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
164 Scan.this_string "<!--" --
165 Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
166 Scan.this_string "-->" >> ignored;
168 val parse_processing_instruction =
169 Scan.this_string "<?" --
170 Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
171 Scan.this_string "?>" >> ignored;
174 Scan.this_string "<!DOCTYPE" --
175 Scan.repeat (Scan.unless ($$ ">") regular) --
179 Scan.one Symbol.is_blank >> ignored ||
180 parse_processing_instruction ||
183 val parse_optional_text =
184 Scan.optional (parse_chars >> (single o Text)) [];
186 fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";
187 fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";
188 val parse_name = Scan.one name_start_char ::: Scan.many name_char;
193 blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
195 val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;
197 fun parse_content xs =
198 (parse_optional_text @@@
200 ((parse_element >> single ||
201 parse_cdata >> (single o Text) ||
202 parse_processing_instruction ||
204 @@@ parse_optional_text) >> flat)) xs
206 and parse_element xs =
207 ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--
209 !! (err (fn () => "Expected > or />"))
210 ($$ "/" -- $$ ">" >> ignored ||
211 $$ ">" |-- parse_content --|
212 !! (err (fn () => "Expected </" ^ implode name ^ ">"))
213 ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))
214 >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;
217 (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
221 (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))
222 (blanks |-- parse_document --| blanks))) (raw_explode s) of
224 | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
230 (** XML as data representation language **)
232 exception XML_ATOM of string;
233 exception XML_BODY of tree list;
239 type 'a A = 'a -> string;
240 type 'a T = 'a -> body;
241 type 'a V = 'a -> string list * body;
246 fun int_atom i = signed_string_of_int i;
248 fun bool_atom false = "0"
249 | bool_atom true = "1";
251 fun unit_atom () = "";
254 (* structural nodes *)
256 fun node ts = Elem ((":", []), ts);
258 fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;
260 fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);
263 (* representation of standard types *)
265 fun properties props = [Elem ((":", props), [])];
268 | string s = [Text s];
270 val int = string o int_atom;
272 val bool = string o bool_atom;
274 val unit = string o unit_atom;
276 fun pair f g (x, y) = [node (f x), node (g y)];
278 fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];
280 fun list f xs = map (node o f) xs;
282 fun option _ NONE = []
283 | option f (SOME x) = [node (f x)];
285 fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))];
293 type 'a A = string -> 'a;
294 type 'a T = body -> 'a;
295 type 'a V = string list * body -> 'a;
302 handle Fail _ => raise XML_ATOM s;
304 fun bool_atom "0" = false
305 | bool_atom "1" = true
306 | bool_atom s = raise XML_ATOM s;
308 fun unit_atom "" = ()
309 | unit_atom s = raise XML_ATOM s;
312 (* structural nodes *)
314 fun node (Elem ((":", []), ts)) = ts
315 | node t = raise XML_BODY [t];
318 #1 (fold_map (fn (a, x) =>
319 fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0);
321 fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))
322 | tagged t = raise XML_BODY [t];
325 (* representation of standard types *)
327 fun properties [Elem ((":", props), [])] = props
328 | properties ts = raise XML_BODY ts;
331 | string [Text s] = s
332 | string ts = raise XML_BODY ts;
334 val int = int_atom o string;
336 val bool = bool_atom o string;
338 val unit = unit_atom o string;
340 fun pair f g [t1, t2] = (f (node t1), g (node t2))
341 | pair _ _ ts = raise XML_BODY ts;
343 fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))
344 | triple _ _ _ ts = raise XML_BODY ts;
346 fun list f ts = map (f o node) ts;
348 fun option _ [] = NONE
349 | option f [t] = SOME (f (node t))
350 | option _ ts = raise XML_BODY ts;
354 val (tag, (xs, ts)) = tagged t;
355 val f = nth fs tag handle General.Subscript => raise XML_BODY [t];
357 | variant _ ts = raise XML_BODY ts;