src/HOL/ex/Serbian.thy
changeset 30179 c703c9368c12
child 41266 5eb59b62e7de
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/HOL/ex/Serbian.thy	Sat Feb 28 21:34:33 2009 +0100
     1.3 @@ -0,0 +1,217 @@
     1.4 +(* -*- coding: utf-8 -*- :encoding=utf-8:  
     1.5 +    Author:     Filip Maric
     1.6 +
     1.7 +Example theory involving Unicode characters (UTF-8 encoding) -- 
     1.8 +Conversion between Serbian cyrillic and latin letters (српска ћирилица и латиница)
     1.9 +*)
    1.10 +
    1.11 +header {* A Serbian theory *}
    1.12 +
    1.13 +theory Serbian
    1.14 +imports Main
    1.15 +begin
    1.16 +
    1.17 +text{* Serbian cyrillic letters *}
    1.18 +datatype azbuka =
    1.19 +  azbA   ("А")
    1.20 +| azbB   ("Б")
    1.21 +| azbV   ("В")
    1.22 +| azbG   ("Г")
    1.23 +| azbD   ("Д")
    1.24 +| azbDj  ("Ђ")
    1.25 +| azbE   ("Е")
    1.26 +| azbZv  ("Ж")
    1.27 +| azbZ   ("З")
    1.28 +| azbI   ("И")
    1.29 +| azbJ   ("Ј")
    1.30 +| azbK   ("К")
    1.31 +| azbL   ("Л")
    1.32 +| azbLj  ("Љ")
    1.33 +| azbM   ("М")
    1.34 +| azbN   ("Н")
    1.35 +| azbNj  ("Њ")
    1.36 +| azbO   ("О")
    1.37 +| azbP   ("П")
    1.38 +| azbR   ("Р")
    1.39 +| azbS   ("С")
    1.40 +| azbT   ("Т")
    1.41 +| azbC'  ("Ћ")
    1.42 +| azbU   ("У")
    1.43 +| azbF   ("Ф")
    1.44 +| azbH   ("Х")
    1.45 +| azbC   ("Ц")
    1.46 +| azbCv  ("Ч")
    1.47 +| azbDzv ("Џ")
    1.48 +| azbSv  ("Ш")
    1.49 +| azbSpc
    1.50 +
    1.51 +thm azbuka.induct
    1.52 +
    1.53 +text{* Serbian latin letters *}
    1.54 +datatype abeceda =
    1.55 +  abcA   ("A")
    1.56 +| abcB   ("B")
    1.57 +| abcC   ("C")
    1.58 +| abcCv  ("Č")
    1.59 +| abcC'  ("Ć")
    1.60 +| abcD   ("D")
    1.61 +| abcE   ("E")
    1.62 +| abcF   ("F")
    1.63 +| abcG   ("G")
    1.64 +| abcH   ("H")
    1.65 +| abcI   ("I")
    1.66 +| abcJ   ("J")
    1.67 +| abcK   ("K")
    1.68 +| abcL   ("L")
    1.69 +| abcM   ("M")
    1.70 +| abcN   ("N")
    1.71 +| abcO   ("O")
    1.72 +| abcP   ("P")
    1.73 +| abcR   ("R")
    1.74 +| abcS   ("S")
    1.75 +| abcSv  ("Š")
    1.76 +| abcT   ("T")
    1.77 +| abcU   ("U")
    1.78 +| abcV   ("V")
    1.79 +| abcZ   ("Z")
    1.80 +| abcvZ  ("Ž")
    1.81 +| abcSpc
    1.82 +
    1.83 +thm abeceda.induct
    1.84 +
    1.85 +
    1.86 +text{* Conversion from cyrillic to latin - 
    1.87 +       this conversion is valid in all cases *}
    1.88 +primrec azb2abc_aux :: "azbuka \<Rightarrow> abeceda list"
    1.89 +where
    1.90 +  "azb2abc_aux А = [A]"
    1.91 +| "azb2abc_aux Б = [B]"
    1.92 +| "azb2abc_aux В = [V]"
    1.93 +| "azb2abc_aux Г = [G]"
    1.94 +| "azb2abc_aux Д = [D]"
    1.95 +| "azb2abc_aux Ђ = [D, J]"
    1.96 +| "azb2abc_aux Е = [E]"
    1.97 +| "azb2abc_aux Ж = [Ž]"
    1.98 +| "azb2abc_aux З = [Z]"
    1.99 +| "azb2abc_aux И = [I]"
   1.100 +| "azb2abc_aux Ј = [J]"
   1.101 +| "azb2abc_aux К = [K]"
   1.102 +| "azb2abc_aux Л = [L]"
   1.103 +| "azb2abc_aux Љ = [L, J]"
   1.104 +| "azb2abc_aux М = [M]"
   1.105 +| "azb2abc_aux Н = [N]"
   1.106 +| "azb2abc_aux Њ = [N, J]"
   1.107 +| "azb2abc_aux О = [O]"
   1.108 +| "azb2abc_aux П = [P]"
   1.109 +| "azb2abc_aux Р = [R]"
   1.110 +| "azb2abc_aux С = [S]"
   1.111 +| "azb2abc_aux Т = [T]"
   1.112 +| "azb2abc_aux Ћ = [Ć]"
   1.113 +| "azb2abc_aux У = [U]"
   1.114 +| "azb2abc_aux Ф = [F]"
   1.115 +| "azb2abc_aux Х = [H]"
   1.116 +| "azb2abc_aux Ц = [C]"
   1.117 +| "azb2abc_aux Ч = [Č]"
   1.118 +| "azb2abc_aux Џ = [D, Ž]"
   1.119 +| "azb2abc_aux Ш = [Š]"
   1.120 +| "azb2abc_aux azbSpc = [abcSpc]"
   1.121 +
   1.122 +primrec azb2abc :: "azbuka list \<Rightarrow> abeceda list"
   1.123 +where
   1.124 +  "azb2abc [] = []"
   1.125 +| "azb2abc (x # xs) = azb2abc_aux x @ azb2abc xs"
   1.126 +
   1.127 +value "azb2abc [Д, О, Б, А, Р, azbSpc, Д, А, Н, azbSpc, С, В, И, М, А]"
   1.128 +value "azb2abc [Љ, У, Б, И, Ч, И, Ц, А, azbSpc, Н, А, azbSpc, П, О, Љ, У]"
   1.129 +
   1.130 +text{* The conversion from latin to cyrillic - 
   1.131 +       this conversion is valid in most cases but there are some exceptions *}
   1.132 +primrec abc2azb_aux :: "abeceda \<Rightarrow> azbuka"
   1.133 +where
   1.134 +   "abc2azb_aux A = А"
   1.135 +|  "abc2azb_aux B = Б"
   1.136 +|  "abc2azb_aux C = Ц"
   1.137 +|  "abc2azb_aux Č = Ч"
   1.138 +|  "abc2azb_aux Ć = Ћ"
   1.139 +|  "abc2azb_aux D = Д"
   1.140 +|  "abc2azb_aux E = Е"
   1.141 +|  "abc2azb_aux F = Ф"
   1.142 +|  "abc2azb_aux G = Г"
   1.143 +|  "abc2azb_aux H = Х"
   1.144 +|  "abc2azb_aux I = И"
   1.145 +|  "abc2azb_aux J = Ј"
   1.146 +|  "abc2azb_aux K = К"
   1.147 +|  "abc2azb_aux L = Л"
   1.148 +|  "abc2azb_aux M = М"
   1.149 +|  "abc2azb_aux N = Н"
   1.150 +|  "abc2azb_aux O = О"
   1.151 +|  "abc2azb_aux P = П"
   1.152 +|  "abc2azb_aux R = Р"
   1.153 +|  "abc2azb_aux S = С"
   1.154 +|  "abc2azb_aux Š = Ш"
   1.155 +|  "abc2azb_aux T = Т"
   1.156 +|  "abc2azb_aux U = У"
   1.157 +|  "abc2azb_aux V = В"
   1.158 +|  "abc2azb_aux Z = З"
   1.159 +|  "abc2azb_aux Ž = Ж"
   1.160 +|  "abc2azb_aux abcSpc = azbSpc"
   1.161 +
   1.162 +fun abc2azb :: "abeceda list \<Rightarrow> azbuka list"
   1.163 +where
   1.164 +  "abc2azb [] = []"
   1.165 +| "abc2azb [x] = [abc2azb_aux x]"
   1.166 +| "abc2azb (x1 # x2 # xs) = 
   1.167 +       (if x1 = D \<and> x2 = J then
   1.168 +           Ђ # abc2azb xs
   1.169 +        else if x1 = L \<and> x2 = J then
   1.170 +           Љ # abc2azb xs
   1.171 +        else if x1 = N \<and> x2 = J then
   1.172 +           Њ # abc2azb xs
   1.173 +        else if x1 = D \<and> x2 = Ž then
   1.174 +           Џ # abc2azb xs
   1.175 +        else
   1.176 +           abc2azb_aux x1 # abc2azb (x2 # xs)
   1.177 +       )"
   1.178 +
   1.179 +value "abc2azb [D, O, B, A, R, abcSpc, D, A, N, abcSpc, S, V, I, M, A]"
   1.180 +value "abc2azb [L, J, U, B, I, Č, I, C, A, abcSpc, N, A, abcSpc, P, O, L, J, U]"
   1.181 +
   1.182 +text{* Here are some invalid conversions *}
   1.183 +lemma "abc2azb [N, A, D, Ž, I, V, E, T, I] = [Н, А, Џ, И, В, Е, Т, И]"
   1.184 +  by simp
   1.185 +text{* but it should be: НАДЖИВЕТИ *}
   1.186 +lemma "abc2azb [I, N, J, E, K, C, I, J, A] = [И, Њ, Е, К, Ц, И, Ј, А]"
   1.187 +  by simp
   1.188 +text{* but it should be: ИНЈЕКЦИЈА *}
   1.189 +
   1.190 +text{* The conversion fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
   1.191 +
   1.192 +
   1.193 +text{* Idempotency in one direction *}
   1.194 +lemma [simp]: "azb2abc_aux (abc2azb_aux x) = [x]"
   1.195 +  by (cases x) auto
   1.196 +
   1.197 +lemma [simp]: "abc2azb (Ž # xs) = Ж # abc2azb xs"
   1.198 +  by (cases xs) auto
   1.199 +
   1.200 +lemma [simp]: "abc2azb (J # xs) = Ј # abc2azb xs"
   1.201 +  by (cases xs) auto
   1.202 +
   1.203 +theorem "azb2abc (abc2azb x) = x"
   1.204 +proof (induct x)
   1.205 +  case (Cons x1 xs)
   1.206 +  thus ?case
   1.207 +  proof (cases xs)
   1.208 +    case (Cons x2 xss)
   1.209 +    thus ?thesis
   1.210 +      using `azb2abc (abc2azb xs) = xs`
   1.211 +      by auto
   1.212 +  qed simp
   1.213 +qed simp
   1.214 +
   1.215 +text{* Idempotency in the other direction does not hold *}
   1.216 +lemma "abc2azb (azb2abc [И, Н, Ј, Е, К, Ц, И, Ј, А]) \<noteq> [И, Н, Ј, Е, К, Ц, И, Ј, А]"
   1.217 +  by simp
   1.218 +text{* It fails for all cyrrilic words that contain НЈ ЛЈ ДЈ ДЖ *}
   1.219 +
   1.220 +end