1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/src/HOL/ex/Serbian.thy Sat Feb 28 21:34:33 2009 +0100
1.3 @@ -0,0 +1,217 @@
1.4 +(* -*- coding: utf-8 -*- :encoding=utf-8:
1.5 + Author: Filip Maric
1.6 +
1.7 +Example theory involving Unicode characters (UTF-8 encoding) --
1.8 +Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница)
1.9 +*)
1.10 +
1.11 +header {* A Serbian theory *}
1.12 +
1.13 +theory Serbian
1.14 +imports Main
1.15 +begin
1.16 +
1.17 +text{* Serbian cyrillic letters *}
1.18 +datatype azbuka =
1.19 + azbA ("А")
1.20 +| azbB ("Б")
1.21 +| azbV ("В")
1.22 +| azbG ("Г")
1.23 +| azbD ("Д")
1.24 +| azbDj ("Ђ")
1.25 +| azbE ("Е")
1.26 +| azbZv ("Ж")
1.27 +| azbZ ("З")
1.28 +| azbI ("И")
1.29 +| azbJ ("Ј")
1.30 +| azbK ("К")
1.31 +| azbL ("Л")
1.32 +| azbLj ("Љ")
1.33 +| azbM ("М")
1.34 +| azbN ("Н")
1.35 +| azbNj ("Њ")
1.36 +| azbO ("О")
1.37 +| azbP ("П")
1.38 +| azbR ("Р")
1.39 +| azbS ("С")
1.40 +| azbT ("Т")
1.41 +| azbC' ("Ћ")
1.42 +| azbU ("У")
1.43 +| azbF ("Ф")
1.44 +| azbH ("Х")
1.45 +| azbC ("Ц")
1.46 +| azbCv ("Ч")
1.47 +| azbDzv ("Џ")
1.48 +| azbSv ("Ш")
1.49 +| azbSpc
1.50 +
1.51 +thm azbuka.induct
1.52 +
1.53 +text{* Serbian latin letters *}
1.54 +datatype abeceda =
1.55 + abcA ("A")
1.56 +| abcB ("B")
1.57 +| abcC ("C")
1.58 +| abcCv ("Č")
1.59 +| abcC' ("Ć")
1.60 +| abcD ("D")
1.61 +| abcE ("E")
1.62 +| abcF ("F")
1.63 +| abcG ("G")
1.64 +| abcH ("H")
1.65 +| abcI ("I")
1.66 +| abcJ ("J")
1.67 +| abcK ("K")
1.68 +| abcL ("L")
1.69 +| abcM ("M")
1.70 +| abcN ("N")
1.71 +| abcO ("O")
1.72 +| abcP ("P")
1.73 +| abcR ("R")
1.74 +| abcS ("S")
1.75 +| abcSv ("Š")
1.76 +| abcT ("T")
1.77 +| abcU ("U")
1.78 +| abcV ("V")
1.79 +| abcZ ("Z")
1.80 +| abcvZ ("Ž")
1.81 +| abcSpc
1.82 +
1.83 +thm abeceda.induct
1.84 +
1.85 +
1.86 +text{* Conversion from cyrillic to latin -
1.87 + this conversion is valid in all cases *}
1.88 +primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list"
1.89 +where
1.90 + "azb2abc_aux А = [A]"
1.91 +| "azb2abc_aux Б = [B]"
1.92 +| "azb2abc_aux В = [V]"
1.93 +| "azb2abc_aux Г = [G]"
1.94 +| "azb2abc_aux Д = [D]"
1.95 +| "azb2abc_aux Ђ = [D, J]"
1.96 +| "azb2abc_aux Е = [E]"
1.97 +| "azb2abc_aux Ж = [Ž]"
1.98 +| "azb2abc_aux З = [Z]"
1.99 +| "azb2abc_aux И = [I]"
1.100 +| "azb2abc_aux Ј = [J]"
1.101 +| "azb2abc_aux К = [K]"
1.102 +| "azb2abc_aux Л = [L]"
1.103 +| "azb2abc_aux Љ = [L, J]"
1.104 +| "azb2abc_aux М = [M]"
1.105 +| "azb2abc_aux Н = [N]"
1.106 +| "azb2abc_aux Њ = [N, J]"
1.107 +| "azb2abc_aux О = [O]"
1.108 +| "azb2abc_aux П = [P]"
1.109 +| "azb2abc_aux Р = [R]"
1.110 +| "azb2abc_aux С = [S]"
1.111 +| "azb2abc_aux Т = [T]"
1.112 +| "azb2abc_aux Ћ = [Ć]"
1.113 +| "azb2abc_aux У = [U]"
1.114 +| "azb2abc_aux Ф = [F]"
1.115 +| "azb2abc_aux Х = [H]"
1.116 +| "azb2abc_aux Ц = [C]"
1.117 +| "azb2abc_aux Ч = [Č]"
1.118 +| "azb2abc_aux Џ = [D, Ž]"
1.119 +| "azb2abc_aux Ш = [Š]"
1.120 +| "azb2abc_aux azbSpc = [abcSpc]"
1.121 +
1.122 +primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list"
1.123 +where
1.124 + "azb2abc [] = []"
1.125 +| "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs"
1.126 +
1.127 +value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]"
1.128 +value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]"
1.129 +
1.130 +text{* The conversion from latin to cyrillic -
1.131 + this conversion is valid in most cases but there are some exceptions *}
1.132 +primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka"
1.133 +where
1.134 + "abc2azb_aux A = А"
1.135 +| "abc2azb_aux B = Б"
1.136 +| "abc2azb_aux C = Ц"
1.137 +| "abc2azb_aux Č = Ч"
1.138 +| "abc2azb_aux Ć = Ћ"
1.139 +| "abc2azb_aux D = Д"
1.140 +| "abc2azb_aux E = Е"
1.141 +| "abc2azb_aux F = Ф"
1.142 +| "abc2azb_aux G = Г"
1.143 +| "abc2azb_aux H = Х"
1.144 +| "abc2azb_aux I = И"
1.145 +| "abc2azb_aux J = Ј"
1.146 +| "abc2azb_aux K = К"
1.147 +| "abc2azb_aux L = Л"
1.148 +| "abc2azb_aux M = М"
1.149 +| "abc2azb_aux N = Н"
1.150 +| "abc2azb_aux O = О"
1.151 +| "abc2azb_aux P = П"
1.152 +| "abc2azb_aux R = Р"
1.153 +| "abc2azb_aux S = С"
1.154 +| "abc2azb_aux Š = Ш"
1.155 +| "abc2azb_aux T = Т"
1.156 +| "abc2azb_aux U = У"
1.157 +| "abc2azb_aux V = В"
1.158 +| "abc2azb_aux Z = З"
1.159 +| "abc2azb_aux Ž = Ж"
1.160 +| "abc2azb_aux abcSpc = azbSpc"
1.161 +
1.162 +fun abc2azb :: "abeceda list \<Rightarrow> azbuka list"
1.163 +where
1.164 + "abc2azb [] = []"
1.165 +| "abc2azb [x] = [abc2azb_aux x]"
1.166 +| "abc2azb (x1 # x2 # xs) =
1.167 + (if x1 = D \<and> x2 = J then
1.168 + Ђ # abc2azb xs
1.169 + else if x1 = L \<and> x2 = J then
1.170 + Љ # abc2azb xs
1.171 + else if x1 = N \<and> x2 = J then
1.172 + Њ # abc2azb xs
1.173 + else if x1 = D \<and> x2 = Ž then
1.174 + Џ # abc2azb xs
1.175 + else
1.176 + abc2azb_aux x1 # abc2azb (x2 # xs)
1.177 + )"
1.178 +
1.179 +value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]"
1.180 +value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]"
1.181 +
1.182 +text{* Here are some invalid conversions *}
1.183 +lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]"
1.184 + by simp
1.185 +text{* but it should be: НАДЖИВЕТИ *}
1.186 +lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]"
1.187 + by simp
1.188 +text{* but it should be: ИНЈЕКЦИЈА *}
1.189 +
1.190 +text{* The conversion fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
1.191 +
1.192 +
1.193 +text{* Idempotency in one direction *}
1.194 +lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]"
1.195 + by (cases x) auto
1.196 +
1.197 +lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs"
1.198 + by (cases xs) auto
1.199 +
1.200 +lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs"
1.201 + by (cases xs) auto
1.202 +
1.203 +theorem "azb2abc (abc2azb x) = x"
1.204 +proof (induct x)
1.205 + case (Cons x1 xs)
1.206 + thus ?case
1.207 + proof (cases xs)
1.208 + case (Cons x2 xss)
1.209 + thus ?thesis
1.210 + using `azb2abc (abc2azb xs) = xs`
1.211 + by auto
1.212 + qed simp
1.213 +qed simp
1.214 +
1.215 +text{* Idempotency in the other direction does not hold *}
1.216 +lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]"
1.217 + by simp
1.218 +text{* It fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
1.219 +
1.220 +end