case conversion including tailoring based on PyICU
authorvb
Mon, 23 Mar 2015 21:42:45 +0100
changeset 120c282594b523b
parent 119 cf32295fd797
child 121 dcefb3741425
case conversion including tailoring based on PyICU
TODO.txt
db/dic2csv.py
src/mime.c
     1.1 --- a/TODO.txt	Sun Mar 15 22:16:09 2015 +0100
     1.2 +++ b/TODO.txt	Mon Mar 23 21:42:45 2015 +0100
     1.3 @@ -1,2 +1,4 @@
     1.4  - check if own key is good; if not: create a new keypair
     1.5 +- warning with forwarding former encrypted messages unencryptedly
     1.6 +- remove four-letter-words from dicts
     1.7  
     2.1 --- a/db/dic2csv.py	Sun Mar 15 22:16:09 2015 +0100
     2.2 +++ b/db/dic2csv.py	Mon Mar 23 21:42:45 2015 +0100
     2.3 @@ -24,8 +24,20 @@
     2.4  
     2.5  args = p.parse_args()
     2.6  
     2.7 +try:
     2.8 +    from icu import UnicodeString, Locale
     2.9 +except ImportError:
    2.10 +    print("warning: PyICU not installed, using fallback", file=sys.stderr)
    2.11 +    def upper(x):
    2.12 +        return x.upper();
    2.13 +else:
    2.14 +    locale = Locale(args.lang)
    2.15 +    def upper(x):
    2.16 +        u = UnicodeString(x)
    2.17 +        return str(u.toUpper(locale))
    2.18 +
    2.19  _all = (
    2.20 -    word.match(line).group(1).upper()
    2.21 +    upper(word.match(line).group(1))
    2.22          for line in FileInput(
    2.23                  args.hunspell + "/" + args.lang + ".dic",
    2.24                  openhook=hook_encoded(args.encoding)
    2.25 @@ -36,6 +48,9 @@
    2.26  _words.sort()
    2.27  _words = [w for w, g in itertools.groupby(_words)]
    2.28  
    2.29 +while len(_words) > 65536 * 2:
    2.30 +    _words = _words[::2]
    2.31 +
    2.32  if len(_words) > 65536:
    2.33      _words = _words[:65536]
    2.34  elif len(_words) < 65536:
     3.1 --- a/src/mime.c	Sun Mar 15 22:16:09 2015 +0100
     3.2 +++ b/src/mime.c	Mon Mar 23 21:42:45 2015 +0100
     3.3 @@ -1211,11 +1211,92 @@
     3.4  
     3.5              clistiter *cur;
     3.6              for (cur = clist_begin(partlist); cur; cur = clist_next(cur)) {
     3.7 -                struct mailmime *part= clist_content(cur);
     3.8 +                size_t index;
     3.9 +                int r;
    3.10 +                struct mailmime *part = clist_content(cur);
    3.11                  if (part == NULL)
    3.12                      return PEP_ILLEGAL_VALUE;
    3.13  
    3.14 +                content = part->mm_content_type;
    3.15 +                assert(content);
    3.16 +                if (content == NULL)
    3.17 +                    return PEP_ILLEGAL_VALUE;
    3.18  
    3.19 +                if (content->ct_type == NULL)
    3.20 +                    return PEP_ILLEGAL_VALUE;
    3.21 +
    3.22 +                switch (content->ct_type->tp_type) {
    3.23 +                    case MAILMIME_TYPE_DISCRETE_TYPE:
    3.24 +                        if (content->ct_type->tp_data.tp_discrete_type == NULL)
    3.25 +                            return PEP_ILLEGAL_VALUE;
    3.26 +
    3.27 +                        switch (content->ct_type->tp_data.tp_discrete_type->
    3.28 +                                dt_type) {
    3.29 +                            case MAILMIME_DISCRETE_TYPE_TEXT:
    3.30 +                                if (strcmp(content->ct_subtype, "plain") ==
    3.31 +                                        0) {
    3.32 +                                    const char *text;
    3.33 +                                    size_t length;
    3.34 +
    3.35 +                                    if (part->mm_body == NULL)
    3.36 +                                        return PEP_ILLEGAL_VALUE;
    3.37 +
    3.38 +                                    text = part->mm_body->
    3.39 +                                            dt_data.dt_text.dt_data;
    3.40 +                                    length =
    3.41 +                                        part->mm_body->dt_data.dt_text.dt_length;
    3.42 +                                    index = 0;
    3.43 +                                    r = mailmime_encoded_phrase_parse(
    3.44 +                                            "utf-8", text, length, &index,
    3.45 +                                            "utf-8", &msg->longmsg);
    3.46 +                                    if (r)
    3.47 +                                        return PEP_ILLEGAL_VALUE;
    3.48 +                                }
    3.49 +                                else if (strcmp(content->ct_subtype, "html") ==
    3.50 +                                        0) {
    3.51 +                                    const char *html;
    3.52 +                                    size_t length;
    3.53 +
    3.54 +                                    if (part->mm_body == NULL)
    3.55 +                                        return PEP_ILLEGAL_VALUE;
    3.56 +
    3.57 +                                    html = part->mm_body->
    3.58 +                                            dt_data.dt_text.dt_data;
    3.59 +                                    length =
    3.60 +                                        part->mm_body->dt_data.dt_text.dt_length;
    3.61 +                                    index = 0;
    3.62 +                                    r = mailmime_encoded_phrase_parse(
    3.63 +                                            "utf-8", html, length, &index,
    3.64 +                                            "utf-8", &msg->longmsg_formatted);
    3.65 +                                    if (r)
    3.66 +                                        return PEP_ILLEGAL_VALUE;
    3.67 +                                }
    3.68 +                                else {
    3.69 +                                    return interpret_MIME(part, msg);
    3.70 +                                }
    3.71 +
    3.72 +                                break;
    3.73 +                                
    3.74 +                            case MAILMIME_DISCRETE_TYPE_IMAGE:
    3.75 +                            case MAILMIME_DISCRETE_TYPE_AUDIO:
    3.76 +                            case MAILMIME_DISCRETE_TYPE_VIDEO:
    3.77 +                            case MAILMIME_DISCRETE_TYPE_APPLICATION:
    3.78 +                            case MAILMIME_DISCRETE_TYPE_EXTENSION:
    3.79 +                                    return interpret_MIME(part, msg);
    3.80 +
    3.81 +                                break;
    3.82 +                                
    3.83 +                            default:
    3.84 +                                return PEP_ILLEGAL_VALUE;
    3.85 +                        }
    3.86 +                        break;
    3.87 +
    3.88 +                    case MAILMIME_TYPE_COMPOSITE_TYPE:
    3.89 +
    3.90 +                        break;
    3.91 +                    default:
    3.92 +                        return PEP_ILLEGAL_VALUE;
    3.93 +                }
    3.94              }
    3.95          }
    3.96          if (content->ct_type &&