wenzelm@12416
|
1 |
(* Title: Pure/General/xml.ML
|
wenzelm@12416
|
2 |
ID: $Id$
|
berghofe@13729
|
3 |
Author: Markus Wenzel, LMU Muenchen
|
berghofe@13729
|
4 |
Stefan Berghofer, TU Muenchen
|
wenzelm@12416
|
5 |
License: GPL (GNU GENERAL PUBLIC LICENSE)
|
wenzelm@12416
|
6 |
|
berghofe@13729
|
7 |
Basic support for XML input and output.
|
wenzelm@12416
|
8 |
*)
|
wenzelm@12416
|
9 |
|
wenzelm@12416
|
10 |
signature XML =
|
wenzelm@12416
|
11 |
sig
|
berghofe@13729
|
12 |
datatype tree =
|
berghofe@13729
|
13 |
Elem of string * (string * string) list * tree list
|
berghofe@13729
|
14 |
| Text of string
|
wenzelm@12416
|
15 |
val element: string -> (string * string) list -> string list -> string
|
wenzelm@12416
|
16 |
val text: string -> string
|
wenzelm@12416
|
17 |
val header: string
|
berghofe@13729
|
18 |
val string_of_tree: tree -> string
|
berghofe@13729
|
19 |
val tree_of_string: string -> tree
|
berghofe@14185
|
20 |
val parse_content: string list -> tree list * string list
|
berghofe@14185
|
21 |
val parse_elem: string list -> tree * string list
|
berghofe@14185
|
22 |
val parse_document: string list -> (string option * tree) * string list
|
wenzelm@12416
|
23 |
end;
|
wenzelm@12416
|
24 |
|
wenzelm@12416
|
25 |
structure XML: XML =
|
wenzelm@12416
|
26 |
struct
|
wenzelm@12416
|
27 |
|
wenzelm@12416
|
28 |
(* character data *)
|
wenzelm@12416
|
29 |
|
wenzelm@12416
|
30 |
fun encode "<" = "<"
|
wenzelm@12416
|
31 |
| encode ">" = ">"
|
wenzelm@12416
|
32 |
| encode "&" = "&"
|
wenzelm@12416
|
33 |
| encode "'" = "'"
|
wenzelm@12416
|
34 |
| encode "\"" = """
|
wenzelm@12416
|
35 |
| encode c = c;
|
wenzelm@12416
|
36 |
|
berghofe@13729
|
37 |
fun decode "<" = "<"
|
berghofe@13729
|
38 |
| decode ">" = ">"
|
berghofe@13729
|
39 |
| decode "&" = "&"
|
berghofe@13729
|
40 |
| decode "'" = "'"
|
berghofe@13729
|
41 |
| decode """ = "\""
|
berghofe@13729
|
42 |
| decode c = c;
|
berghofe@13729
|
43 |
|
berghofe@14596
|
44 |
val text = String.translate (encode o String.str);
|
wenzelm@12416
|
45 |
|
wenzelm@12416
|
46 |
|
wenzelm@12416
|
47 |
(* elements *)
|
wenzelm@12416
|
48 |
|
berghofe@13729
|
49 |
datatype tree =
|
berghofe@13729
|
50 |
Elem of string * (string * string) list * tree list
|
berghofe@13729
|
51 |
| Text of string;
|
berghofe@13729
|
52 |
|
berghofe@14596
|
53 |
fun attribute (a, x) = a ^ " = " ^ Library.quote (text x);
|
wenzelm@12416
|
54 |
|
wenzelm@12416
|
55 |
fun element name atts cs =
|
wenzelm@12416
|
56 |
let val elem = space_implode " " (name :: map attribute atts) in
|
wenzelm@12416
|
57 |
if null cs then enclose "<" "/>" elem
|
wenzelm@12416
|
58 |
else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name
|
wenzelm@12416
|
59 |
end;
|
wenzelm@12416
|
60 |
|
berghofe@13729
|
61 |
fun string_of_tree (Elem (name, atts, ts)) =
|
berghofe@13729
|
62 |
element name atts (map string_of_tree ts)
|
berghofe@13729
|
63 |
| string_of_tree (Text s) = s
|
berghofe@13729
|
64 |
|
wenzelm@12416
|
65 |
val header = "<?xml version=\"1.0\"?>\n";
|
wenzelm@12416
|
66 |
|
berghofe@13729
|
67 |
|
berghofe@13729
|
68 |
(* parser *)
|
berghofe@13729
|
69 |
|
berghofe@14185
|
70 |
fun err s (xs, _) = "XML parsing error: " ^ s ^ "\nfound:\n" ^
|
berghofe@14185
|
71 |
implode (take (100, xs));
|
berghofe@13729
|
72 |
|
berghofe@14185
|
73 |
val scan_whspc = Scan.repeat ($$ " " || $$ "\n");
|
berghofe@13729
|
74 |
|
berghofe@14185
|
75 |
val literal = Scan.literal o Scan.make_lexicon o single o explode;
|
berghofe@13729
|
76 |
|
berghofe@13729
|
77 |
val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
|
berghofe@13729
|
78 |
|
berghofe@14185
|
79 |
val parse_chars = Scan.repeat1 (Scan.unless (scan_whspc -- $$ "<")
|
berghofe@14185
|
80 |
(scan_special || Scan.one Symbol.not_eof)) >> implode;
|
berghofe@13729
|
81 |
|
berghofe@14185
|
82 |
val parse_cdata = literal "<![CDATA[" |--
|
berghofe@14185
|
83 |
(Scan.repeat (Scan.unless (literal "]]>") (Scan.one Symbol.not_eof)) >>
|
berghofe@14185
|
84 |
implode) --| literal "]]>";
|
berghofe@13729
|
85 |
|
berghofe@14185
|
86 |
val parse_att =
|
berghofe@14185
|
87 |
Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc --| $$ "\"" --
|
berghofe@14185
|
88 |
(Scan.repeat (Scan.unless ($$ "\"")
|
berghofe@14185
|
89 |
(scan_special || Scan.one Symbol.not_eof)) >> implode) --| $$ "\"";
|
berghofe@14185
|
90 |
|
berghofe@14185
|
91 |
val parse_comment = literal "<!--" --
|
berghofe@14185
|
92 |
Scan.repeat (Scan.unless (literal "-->") (Scan.one Symbol.not_eof)) --
|
berghofe@14185
|
93 |
literal "-->";
|
berghofe@14185
|
94 |
|
berghofe@14185
|
95 |
val parse_pi = literal "<?" |--
|
berghofe@14185
|
96 |
Scan.repeat (Scan.unless (literal "?>") (Scan.one Symbol.not_eof)) --|
|
berghofe@14185
|
97 |
literal "?>";
|
berghofe@14185
|
98 |
|
berghofe@14185
|
99 |
fun parse_content xs =
|
berghofe@14185
|
100 |
((Scan.optional (scan_whspc |-- parse_chars >> (single o Text)) [] --
|
berghofe@14185
|
101 |
(Scan.repeat (scan_whspc |--
|
berghofe@14185
|
102 |
( parse_elem >> single
|
berghofe@14185
|
103 |
|| parse_cdata >> (single o Text)
|
berghofe@14185
|
104 |
|| parse_pi >> K []
|
berghofe@14185
|
105 |
|| parse_comment >> K []) --
|
berghofe@14185
|
106 |
Scan.optional (scan_whspc |-- parse_chars >> (single o Text)) []
|
berghofe@14185
|
107 |
>> op @) >> flat) >> op @) --| scan_whspc) xs
|
berghofe@14185
|
108 |
|
berghofe@14185
|
109 |
and parse_elem xs =
|
berghofe@14185
|
110 |
($$ "<" |-- Symbol.scan_id --
|
berghofe@14185
|
111 |
Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) =>
|
berghofe@14185
|
112 |
!! (err "Expected > or />")
|
berghofe@14185
|
113 |
( literal "/>" >> K []
|
berghofe@14185
|
114 |
|| $$ ">" |-- parse_content --|
|
berghofe@14185
|
115 |
!! (err ("Expected </" ^ s ^ ">"))
|
berghofe@14185
|
116 |
(literal ("</" ^ s) --| scan_whspc --| $$ ">"))) >>
|
berghofe@13729
|
117 |
(fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
|
berghofe@13729
|
118 |
|
berghofe@14185
|
119 |
val parse_document =
|
berghofe@14185
|
120 |
Scan.option (literal "<!DOCTYPE" -- scan_whspc |--
|
berghofe@14185
|
121 |
(Scan.repeat (Scan.unless ($$ ">")
|
berghofe@14185
|
122 |
(Scan.one Symbol.not_eof)) >> implode) --| $$ ">" --| scan_whspc) --
|
berghofe@14185
|
123 |
parse_elem;
|
berghofe@14185
|
124 |
|
berghofe@13729
|
125 |
fun tree_of_string s =
|
berghofe@14185
|
126 |
(case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
|
berghofe@14185
|
127 |
(scan_whspc |-- parse_elem --| scan_whspc))) (Symbol.explode s) of
|
berghofe@14185
|
128 |
(x, []) => x
|
berghofe@14185
|
129 |
| (_, ys) => error ("XML parsing error: Unprocessed input\n" ^
|
berghofe@14185
|
130 |
implode (take (100, ys))));
|
berghofe@13729
|
131 |
|
wenzelm@12416
|
132 |
end;
|