hu_Hung.sor 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. # Old Hungarian script (ISO 15924 code: Hung)
  2. # Transliterate numbers and words
  3. # convert words with traditional or foreign "i" written as "y"
  4. # e.g. Áprily, Champs-Élysées, Élysée-palota, Dolly, Folly, Hollywood, jolly...
  5. "^(Áp​?ri​?l|Champs-Él|[cC]i​?t|Do​?lák-Sa​?l|[dfhjDFHJ]ol​?l|Él|Fesz​?t|[gG]rizz​?l|Ha​?rasz​?t|Hat​?va​?n|Husz​?t|[iI]n​?ter​?ci​?t|Kéth​?l|Ku​?ko​?r​?el​?l|Mind​?szen​?t|Nosz​?t|[pP]en​n|Pes​?t|Re​?gu​?l|So​?n|Szi​?l|Szte​?va​?no​?vi​?t|Thö​?kö​?l|Vö​?rös​?mar​?t|[zZ][lł]ot)y(.*) 0$" $1𐳐$2
  6. "^(ÁP​?RI​?L|CHAMPS-ÉL|CI​?T|DO​?LÁK-SA​?L|[DFHJ]OL​?L|ÉL|FESZ​?T|GRIZZ​?L|HA​?RASZ​?T|HAT​?VA​?N|HUSZ​?T|IN​?TER​?CI​?T|KÉTH​?L|KU​?KO​?R​?EL​?L|MIND​?SZEN​?T|NOSZ​?T|PEN​?N|PES​?T|RE​GU​?L|SO​N|SZI​?L|SZTE​?VA​?NO​?VI​?T|THÖ​?KÖ​?L|VÖ​?RÖS​?MAR​?T|Z[LŁ]OT)Y(.*) 0$" $1𐲐$2
  7. # if the original word contains an unknown character, return without modification
  8. "^(.*[^-0-9a-zA-ZáéëóöőúüűÁÉËÓÖŐÚÜŰ​–,„”\?\;]) 0$" \1
  9. # words with y
  10. "^y(ard.*) 0$" 𐳒$1
  11. "^Y([aA][rR][dD].*|[uU]​[cC][oO][nN].*) 0$" 𐲒$1
  12. "^Y([bB][lL].*) 0$" 𐲑$1
  13. "^Y(vet​?te.*) 0$" 𐲐$1
  14. "^([bB]o|[cC]ow​bo|[dD]isp​la|[gG]ra|[pP]la)y(.*) 0$" $1𐳒$2
  15. "^(BO|COW​BO|DISP​LA|GRA|PLA)Y(.*) 0$" $1𐲒$2
  16. # don't transliterate other words with starting y
  17. "(^[yY].*) 0$" \1
  18. # don't transliterate words with q, but not with qu
  19. "(^.*[qQ][^uU].*) 0$" \1
  20. # avoid of exceeding recursion depth
  21. # convert by 200-character parts
  22. (.{200})(.+) $1$2
  23. # numbers
  24. # remove space separated zero (in LibreOffice integration)
  25. "(\d+) 0" $1
  26. "0: (.*) (.*)"
  27. "1: (.*) (.*)" \1
  28. "2: (.*) (.*)" \1\1
  29. "3: (.*) (.*)" \1\1\1
  30. "4: (.*) (.*)" \1\1\1\1
  31. "5: (.*) (.*)" \2
  32. "6: (.*) (.*)" \2\1
  33. "7: (.*) (.*)" \2\1\1
  34. "8: (.*) (.*)" \2\1\1\1
  35. "9: (.*) (.*)" \2\1\1\1\1
  36. (\d) $(\1: 𐳺 𐳻)
  37. (\d)(\d) $(\1: 𐳼 𐳽)$2
  38. 1(\d\d) $1𐳾
  39. (\d)(\d\d) $1𐳾$2
  40. 1(\d\d\d)$ $1𐳿
  41. (\d{1,3})(\d\d\d) $1𐳿$2
  42. 1(\d{6})$ $1𐳿𐳿
  43. (\d{1,3})(\d{6}) $1𐳿𐳿$2
  44. 1(\d{9})$ $1𐳿𐳿𐳿
  45. (\d{1,3})(\d{9}) $1𐳿𐳿𐳿$2
  46. # numbers with letters, for example dates with affixes
  47. "(\d+)([^ ]+)" $1$2
  48. # letters
  49. "^(.*) 0$" $1
  50. a(.*) 𐳀$1
  51. A(.*) 𐲀$1
  52. á(.*) 𐳁$1
  53. Á(.*) 𐲁$1
  54. b(.*) 𐳂$1
  55. B(.*) 𐲂$1
  56. ccs(.*) 𐳆𐳆$1
  57. CCS(.*) 𐲆𐲆$1
  58. cs(.*) 𐳆$1
  59. C[sS](.*) 𐲆$1
  60. c(.*) 𐳄$1
  61. C(.*) 𐲄$1
  62. d(.*) 𐳇$1
  63. D(.*) 𐲇$1
  64. e(.*) 𐳉$1
  65. E(.*) 𐲉$1
  66. é(.*) 𐳋$1
  67. É(.*) 𐲋$1
  68. ä(.*) 𐳋$1
  69. Ä(.*) 𐲋$1
  70. ë(.*) 𐳊$1
  71. Ë(.*) 𐲊$1
  72. f(.*) 𐳌$1
  73. F(.*) 𐲌$1
  74. ggy(.*) 𐳎𐳎$1
  75. GGY(.*) 𐲎𐲎$1
  76. gy(.*) 𐳎$1
  77. G[yY](.*) 𐲎$1
  78. g(.*) 𐳍$1
  79. G(.*) 𐲍$1
  80. h(.*) 𐳏$1
  81. H(.*) 𐲏$1
  82. i(.*) 𐳐$1
  83. I(.*) 𐲐$1
  84. í(.*) 𐳑$1
  85. Í(.*) 𐲑$1
  86. j(.*) 𐳒$1
  87. J(.*) 𐲒$1
  88. k(.*) 𐳓$1
  89. K(.*) 𐲓$1
  90. lly(.*) 𐳗𐳗$1
  91. LLY(.*) 𐲗𐲗$1
  92. ly(.*) 𐳗$1
  93. L[yY](.*) 𐲗$1
  94. l(.*) 𐳖$1
  95. L(.*) 𐲖$1
  96. m(.*) 𐳘$1
  97. M(.*) 𐲘$1
  98. nny(.*) 𐳚𐳚$1
  99. NNY(.*) 𐲚𐲚$1
  100. ny(.*) 𐳚$1
  101. N[ny](.*) 𐲚$1
  102. n(.*) 𐳙$1
  103. N(.*) 𐲙$1
  104. o(.*) 𐳛$1
  105. O(.*) 𐲛$1
  106. ó(.*) 𐳜$1
  107. Ó(.*) 𐲜$1
  108. ö(.*) 𐳞$1
  109. Ö(.*) 𐲞$1
  110. ő(.*) 𐳟$1
  111. Ő(.*) 𐲟$1
  112. p(.*) 𐳠$1
  113. P(.*) 𐲠$1
  114. qu(.*) 𐳓𐳮$1 # qu->kv
  115. Qu(.*) 𐲓𐳮$1 # Qu->Kv
  116. QU(.*) 𐲓𐲮$1 # QU->KV
  117. r(.*) 𐳢$1
  118. R(.*) 𐲢$1
  119. ssz(.*) 𐳥𐳥$1
  120. SSZ(.*) 𐲥𐲥$1
  121. sz(.*) 𐳥$1
  122. S[zZ](.*) 𐲥$1
  123. sch(.*) 𐳤$1
  124. Sch(.*) 𐲤$1
  125. s(.*) 𐳤$1
  126. S(.*) 𐲤$1
  127. tty(.*) 𐳨𐳨$1
  128. TTY(.*) 𐲨𐲨$1
  129. ty(.*) 𐳨$1
  130. T[yY](.*) 𐲨$1
  131. t(.*) 𐳦$1
  132. T(.*) 𐲦$1
  133. u(.*) 𐳪$1
  134. U(.*) 𐲪$1
  135. ú(.*) 𐳫$1
  136. Ú(.*) 𐲫$1
  137. ü(.*) 𐳭$1
  138. Ü(.*) 𐲭$1
  139. ű(.*) 𐳬$1
  140. Ű(.*) 𐲬$1
  141. [vw](.*) 𐳮$1
  142. [VW](.*) 𐲮$1
  143. x(.*) 𐳓𐳥$1 # x->ksz
  144. ^X 𐲓𐳥$1 # X->KSz
  145. X 𐲓𐲥$1 # X->KSZ
  146. X([A-ZÁÉËÍÓÖŐÚÜŰ].*) 𐲓𐲥$1 # X->KSZ
  147. X(.*) 𐲓𐳥$1 # X->Ksz
  148. y(.*) 𐳐$1 # .+y->i
  149. Y(.*) 𐲐$1 # .+Y->I
  150. zzs(.*) 𐳰𐳰$1
  151. ZZS(.*) 𐲰𐲰$1
  152. zs(.*) 𐳰$1
  153. Z[sS](.*) 𐲰$1
  154. z(.*) 𐳯$1
  155. Z(.*) 𐲯$1
  156. # remove ZWSP (used for consonant disambiguation)
  157. ​(.*) $1
  158. # punctuation
  159. ”(.*) ‟$1
  160. \;(.*) ⁏$1
  161. \?(.*) ⸮$1
  162. ,(.*) ⹁$1
  163. „(.*) ⹂$1
  164. # don't modify unknown characters
  165. (.)(.*) \1$2
  166. (.*) \1