fpdb.conf 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. # This file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to
  2. # include country and encoding
  3. #
  4. # TO-DO: convert to BCP-47
  5. #
  6. # guess strings are made as following : language-country-encoding
  7. #
  8. # Based on a sample config file for the language models provided with Gertjan
  9. # van Noords language guesser (http://odur.let.rug.nl/~vannoord/TextCat/)
  10. #
  11. # Notes:
  12. # - Putting the most probable languages at the top of the list
  13. # improves performance, because this will raise the threshold for
  14. # likely candidates more quickly.
  15. #
  16. # Top 10 http://www.ethnologue.com/ethno_docs/distribution.asp?by=size
  17. zh-Hans.lm zh-CN-utf8 #zh-Hans
  18. es.lm es--utf8
  19. en.lm en--utf8
  20. ar.lm ar--utf8
  21. hi.lm hi--utf8
  22. bn.lm bn--utf8
  23. pt.lm pt--utf8
  24. ru.lm ru--utf8
  25. ja.lm ja--utf8
  26. de.lm de--utf8
  27. ab.lm ab--utf8
  28. ace.lm ace--utf8
  29. ada.lm ada--utf8
  30. af.lm af--utf8
  31. ak.lm ak--utf8
  32. alt.lm alt--utf8
  33. am.lm am--utf8
  34. arn.lm arn--utf8
  35. ast.lm ast--utf8
  36. az.lm az--utf8 #az-Latn
  37. az-Cyrl.lm az-cyrillic-utf8
  38. ay.lm ay--utf8
  39. ban.lm ban--utf8
  40. be.lm be--utf8
  41. bem.lm bem--utf8
  42. bg.lm bg--utf8
  43. bho.lm bho--utf8
  44. bi.lm bi--utf8
  45. bik.lm bik--utf8
  46. bm.lm bm--utf8
  47. bo.lm bo--utf8
  48. br.lm br--utf8
  49. bs.lm bs--utf8 #Suppress-Script: Latn
  50. ca.lm ca--utf8
  51. ckb.lm ckb--utf8
  52. cs.lm cs--utf8
  53. cy.lm cy--utf8
  54. da.lm da--utf8
  55. dv.lm dv--utf8
  56. ee.lm ee--utf8
  57. el.lm el--utf8
  58. emk-Latn.lm emk-Latn-utf8
  59. eo.lm eo--utf8
  60. et.lm et--utf8
  61. eu.lm eu--utf8
  62. fa.lm fa--utf8
  63. fi.lm fi--utf8
  64. fj.lm fj--utf8
  65. fo.lm fo--utf8
  66. fr.lm fr--utf8
  67. fur.lm fur--utf8
  68. fy.lm fy--utf8
  69. ga.lm ga--utf8
  70. gd.lm gd--utf8
  71. gl.lm gl--utf8
  72. grc.lm grc--utf8
  73. gu.lm gu--utf8
  74. gug.lm gug--utf8
  75. gv.lm gv--utf8
  76. ha-NG.lm ha-NG-utf8
  77. haw.lm haw-utf8
  78. he.lm he--utf8
  79. hil.lm hil--utf8
  80. hr.lm hr--utf8 #Suppress-Script: Latn
  81. hsb.lm hsb--utf8
  82. ht.lm ht--utf8
  83. hu.lm hu--utf8
  84. hy.lm hy--utf8
  85. ia.lm ia--utf8
  86. id.lm id--utf8
  87. is.lm is--utf8
  88. it.lm it--utf8
  89. ka.lm ka--utf8
  90. kk.lm kk--utf8
  91. kl.lm kl--utf8
  92. km.lm km--utf8
  93. kn.lm kn--utf8
  94. kng.lm kng--utf8
  95. ko.lm ko--utf8
  96. ktu.lm ktu--utf8
  97. ky.lm ky--utf8
  98. la.lm la--utf8
  99. lb.lm lb--utf8
  100. lg.lm lg--utf8
  101. ln.lm ln--utf8
  102. lo.lm lo--utf8
  103. lt.lm lt--utf8
  104. lv.lm lv--utf8
  105. mai.lm mai--utf8
  106. mi.lm mi--utf8
  107. mk.lm mk--utf8
  108. ml.lm ml--utf8
  109. mn.lm mn--utf8 #mn-Cyrl
  110. mos.lm mos--utf8
  111. mr.lm mr--utf8
  112. ms.lm ms--utf8 #ms-Latn
  113. mt.lm mt--utf8
  114. my.lm my--utf8
  115. nb.lm nb--utf8
  116. nds.lm nds--utf8
  117. ne.lm ne--utf8
  118. nl.lm nl--utf8
  119. nn.lm nn--utf8
  120. nr.lm nr--utf8
  121. nso.lm nso--utf8
  122. ny.lm ny--utf8
  123. oc.lm oc--utf8
  124. om.lm om--utf8
  125. pa.lm pa--utf8
  126. pl.lm pl--utf8
  127. plt.lm plt--utf8
  128. quz.lm quz--utf8
  129. qxa.lm qxa--utf8
  130. rm.lm rm--utf8
  131. ro.lm ro--utf8
  132. rue.lm rue--utf8
  133. rw.lm rw--utf8
  134. sa.lm sa--utf8
  135. sc.lm sc--utf8
  136. sco.lm sco--utf8
  137. sd.lm sd--utf8 #sr-Arab
  138. se.lm se--utf8
  139. sg.lm sg--utf8
  140. shs.lm shs--utf8
  141. si.lm si--utf8
  142. sk.lm sk--utf8
  143. sl.lm sl--utf8
  144. so.lm so--utf8
  145. sq.lm sq--utf8
  146. sr-Cyrl.lm sr--utf8 #sr-Cyrl
  147. sr-Latn.lm sh--utf8 #sr-Latn
  148. ss.lm ss--utf8
  149. st.lm st--utf8
  150. sv.lm sv--utf8
  151. sw.lm sw--utf8
  152. ta.lm ta--utf8
  153. tet.lm tet--utf8
  154. tg.lm tg--utf8
  155. th.lm th--utf8
  156. ti.lm ti--utf8
  157. tk.lm tk--utf8 #tk-Latn
  158. tl.lm tl--utf8
  159. tn.lm tn--utf8
  160. tpi.lm tpi--utf8
  161. tr.lm tr--utf8
  162. ts.lm ts--utf8
  163. tt.lm tt--utf8
  164. ty.lm ty--utf8
  165. tzm-Latn.lm tzm-Latn-utf8
  166. ug.lm ug--utf8 #ug-Arab
  167. uk.lm uk--utf8
  168. ur.lm ur--utf8
  169. uz.lm uz--utf8 #uz-Latn
  170. uz-Cyrl.lm uz-Cyrl-utf8
  171. ve.lm ve--utf8
  172. vep.lm vep--utf8
  173. vi.lm vi--utf8
  174. wa.lm wa--utf8
  175. xh.lm xh--utf8
  176. yi.lm yi--utf8
  177. yo.lm yo--utf8
  178. zh-Hant.lm zh-TW-utf8 #zh-Hant
  179. zu.lm zu--utf8