1.1 --- a/db/dic2csv.py Sun Mar 15 22:16:09 2015 +0100
1.2 +++ b/db/dic2csv.py Mon Mar 23 21:42:45 2015 +0100
1.3 @@ -24,8 +24,20 @@
1.4
1.5 args = p.parse_args()
1.6
1.7 +try:
1.8 + from icu import UnicodeString, Locale
1.9 +except ImportError:
1.10 + print("warning: PyICU not installed, using fallback", file=sys.stderr)
1.11 + def upper(x):
1.12 + return x.upper();
1.13 +else:
1.14 + locale = Locale(args.lang)
1.15 + def upper(x):
1.16 + u = UnicodeString(x)
1.17 + return str(u.toUpper(locale))
1.18 +
1.19 _all = (
1.20 - word.match(line).group(1).upper()
1.21 + upper(word.match(line).group(1))
1.22 for line in FileInput(
1.23 args.hunspell + "/" + args.lang + ".dic",
1.24 openhook=hook_encoded(args.encoding)
1.25 @@ -36,6 +48,9 @@
1.26 _words.sort()
1.27 _words = [w for w, g in itertools.groupby(_words)]
1.28
1.29 +while len(_words) > 65536 * 2:
1.30 + _words = _words[::2]
1.31 +
1.32 if len(_words) > 65536:
1.33 _words = _words[:65536]
1.34 elif len(_words) < 65536: