ENGINE-9: rough draft of fixes, heuristic for choosing cid over filename and vice-versa. In principle it's all in, but needs testing. Lots of testing. ENGINE-9
authorKrista Bennett <krista@pep-project.org>
Tue, 20 Jun 2017 14:28:31 +0200
branchENGINE-9
changeset 18732da96adc534e
parent 1872 23b14ab93b11
child 1874 ca396bbc2124
ENGINE-9: rough draft of fixes, heuristic for choosing cid over filename and vice-versa. In principle it's all in, but needs testing. Lots of testing.
src/etpan_mime.c
src/etpan_mime.h
src/message_api.c
src/mime.c
src/sync_impl.c
test/revoke_regen_attach_test.cc
     1.1 --- a/src/etpan_mime.c	Mon Jun 19 09:32:00 2017 +0200
     1.2 +++ b/src/etpan_mime.c	Tue Jun 20 14:28:31 2017 +0200
     1.3 @@ -596,7 +596,7 @@
     1.4      return content;
     1.5  }
     1.6  
     1.7 -char* _get_uri(char* uri_prefix, char* resource) {
     1.8 +char* _build_uri(char* uri_prefix, char* resource) {
     1.9      if (!uri_prefix || !resource)
    1.10          return NULL;
    1.11      const char* delim = "://";
    1.12 @@ -613,7 +613,61 @@
    1.13      return retval;
    1.14  }
    1.15  
    1.16 +/* Return a list of identifier_type and resource id (filename, cid, etc) */
    1.17 +pEp_rid_list_t* _get_resource_id_list(struct mailmime *mime)
    1.18 +{
    1.19 +    clist * _fieldlist = NULL;
    1.20  
    1.21 +    assert(mime);
    1.22 +
    1.23 +    if (mime->mm_mime_fields && mime->mm_mime_fields->fld_list)
    1.24 +        _fieldlist = mime->mm_mime_fields->fld_list;
    1.25 +    else
    1.26 +        return NULL;
    1.27 +
    1.28 +    clistiter *cur;
    1.29 +
    1.30 +    pEp_rid_list_t* rid_list = NULL; 
    1.31 +    pEp_rid_list_t** rid_list_curr_p = &rid_list; 
    1.32 +        
    1.33 +    for (cur = clist_begin(_fieldlist); cur; cur = clist_next(cur)) {
    1.34 +        struct mailmime_field * _field = clist_content(cur);
    1.35 +        /* content_id */
    1.36 +        if (_field && _field->fld_type == MAILMIME_FIELD_ID) {
    1.37 +            pEp_rid_list_t* new_rid = (pEp_rid_list_t*)calloc(1, sizeof(pEp_rid_list_t*));
    1.38 +            new_rid->rid_type = PEP_RID_CID;
    1.39 +            new_rid->rid = _field->fld_data.fld_id;
    1.40 +            *rid_list_curr_p = new_rid;
    1.41 +            rid_list_curr_p = &new_rid->next;
    1.42 +        }
    1.43 +        else if (_field && _field->fld_type == MAILMIME_FIELD_DISPOSITION) {
    1.44 +            /* filename */
    1.45 +            if (_field->fld_data.fld_disposition &&
    1.46 +                    _field->fld_data.fld_disposition->dsp_parms) {
    1.47 +                clist * _parmlist =
    1.48 +                        _field->fld_data.fld_disposition->dsp_parms;
    1.49 +                clistiter *cur2;
    1.50 +                for (cur2 = clist_begin(_parmlist); cur2; cur2 =
    1.51 +                        clist_next(cur2)) {
    1.52 +                    struct mailmime_disposition_parm * param =
    1.53 +                            clist_content(cur2);
    1.54 +                    if (param->pa_type == MAILMIME_DISPOSITION_PARM_FILENAME) {
    1.55 +                        pEp_rid_list_t* new_rid = (pEp_rid_list_t*)calloc(1, sizeof(pEp_rid_list_t*));
    1.56 +                        new_rid->rid_type = PEP_RID_FILENAME;
    1.57 +                        new_rid->rid = param->pa_data.pa_filename;
    1.58 +                        *rid_list_curr_p = new_rid;
    1.59 +                        rid_list_curr_p = &new_rid->next;
    1.60 +                    }                
    1.61 +                }
    1.62 +            }
    1.63 +        }
    1.64 +    }
    1.65 +    /* Will almost certainly usually be a singleton, but we need to be able to decide */
    1.66 +    return rid_list;
    1.67 +}
    1.68 +
    1.69 +
    1.70 +/* FIXME: about to be obsoleted? */
    1.71  char * _get_filename_or_cid(struct mailmime *mime)
    1.72  {
    1.73      clist * _fieldlist = NULL;
    1.74 @@ -626,11 +680,20 @@
    1.75          return NULL;
    1.76  
    1.77      clistiter *cur;
    1.78 +    
    1.79 +    char* _temp_filename_ptr = NULL;
    1.80 +    
    1.81      for (cur = clist_begin(_fieldlist); cur; cur = clist_next(cur)) {
    1.82          struct mailmime_field * _field = clist_content(cur);
    1.83 -        if (_field && _field->fld_type == MAILMIME_FIELD_DISPOSITION) {
    1.84 +        if (_field && _field->fld_type == MAILMIME_FIELD_ID) {
    1.85 +            /* We prefer CIDs to filenames when both are present */
    1.86 +            free(_temp_filename_ptr); /* can be null, it's ok */
    1.87 +            return _build_uri("cid", _field->fld_data.fld_id); 
    1.88 +        }
    1.89 +        else if (_field && _field->fld_type == MAILMIME_FIELD_DISPOSITION) {
    1.90              if (_field->fld_data.fld_disposition &&
    1.91 -                    _field->fld_data.fld_disposition->dsp_parms) {
    1.92 +                    _field->fld_data.fld_disposition->dsp_parms &&
    1.93 +                    !_temp_filename_ptr) {
    1.94                  clist * _parmlist =
    1.95                          _field->fld_data.fld_disposition->dsp_parms;
    1.96                  clistiter *cur2;
    1.97 @@ -638,16 +701,16 @@
    1.98                          clist_next(cur2)) {
    1.99                      struct mailmime_disposition_parm * param =
   1.100                              clist_content(cur2);
   1.101 -                    if (param->pa_type == MAILMIME_DISPOSITION_PARM_FILENAME)
   1.102 -                        return _get_uri("file", param->pa_data.pa_filename);
   1.103 +                    if (param->pa_type == MAILMIME_DISPOSITION_PARM_FILENAME) {
   1.104 +                        _temp_filename_ptr = _build_uri("file", param->pa_data.pa_filename);
   1.105 +                        break;
   1.106 +                    }                
   1.107                  }
   1.108              }
   1.109          }
   1.110 -        else if (_field && _field->fld_type == MAILMIME_FIELD_ID) 
   1.111 -            return _get_uri("cid", _field->fld_data.fld_id); 
   1.112      }
   1.113 -
   1.114 -    return NULL;
   1.115 +    /* Ok, it wasn't a CID */
   1.116 +    return _temp_filename_ptr;
   1.117  }
   1.118  
   1.119  static bool parameter_has_value(
     2.1 --- a/src/etpan_mime.h	Mon Jun 19 09:32:00 2017 +0200
     2.2 +++ b/src/etpan_mime.h	Tue Jun 20 14:28:31 2017 +0200
     2.3 @@ -7,6 +7,20 @@
     2.4  #include <libetpan/mailmime.h>
     2.5  #include <libetpan/mailmime_encode.h>
     2.6  
     2.7 +/* structs to contain info about parsed resource ids (filenames, uids) */
     2.8 +typedef enum _resource_id_type {
     2.9 +    PEP_RID_FILENAME,
    2.10 +    PEP_RID_CID
    2.11 +} pEp_resource_id_type;
    2.12 +
    2.13 +typedef struct pEp_rid_list_t pEp_rid_list_t;
    2.14 +
    2.15 +struct pEp_rid_list_t {
    2.16 +    pEp_resource_id_type rid_type;
    2.17 +    char* rid;
    2.18 +    pEp_rid_list_t* next;    
    2.19 +};
    2.20 +
    2.21  struct mailmime * part_new_empty(
    2.22          struct mailmime_content * content,
    2.23          struct mailmime_fields * mime_fields,
    2.24 @@ -71,6 +85,8 @@
    2.25  clist * _get_fields(struct mailmime * mime);
    2.26  struct mailmime_content * _get_content(struct mailmime * mime);
    2.27  char * _get_filename_or_cid(struct mailmime *mime);
    2.28 +pEp_rid_list_t* _get_resource_id_list(struct mailmime *mime);
    2.29 +char* _build_uri(char* uri_prefix, char* resource);
    2.30  bool _is_multipart(struct mailmime_content *content, const char *subtype);
    2.31  bool _is_PGP_MIME(struct mailmime_content *content);
    2.32  bool _is_text_part(struct mailmime_content *content, const char *subtype);
     3.1 --- a/src/message_api.c	Mon Jun 19 09:32:00 2017 +0200
     3.2 +++ b/src/message_api.c	Tue Jun 20 14:28:31 2017 +0200
     3.3 @@ -20,6 +20,29 @@
     3.4  #define _MAX(A, B) ((B) > (A) ? (B) : (A))
     3.5  #endif
     3.6  
     3.7 +static char* _get_resource_ptr_noown(char* uri) {
     3.8 +    char* uri_delim = strstr(uri, "://");
     3.9 +    if (!uri_delim)
    3.10 +        return uri;
    3.11 +    else
    3.12 +        return uri + 3;
    3.13 +}
    3.14 +
    3.15 +static char* get_resource(char* uri) {
    3.16 +    const char* resource_ptr = _get_resource_ptr_noown(uri);
    3.17 +    char* resource_str = NULL;
    3.18 +    if (resource_ptr)
    3.19 +        resource_str = strdup(resource_ptr);
    3.20 +    return resource_str;
    3.21 +}
    3.22 +
    3.23 +static bool is_file_uri(char* str) {
    3.24 +    return(strncmp(str, "file://", 7) == 0);
    3.25 +}
    3.26 +
    3.27 +static bool is_cid_uri(const char* str) {
    3.28 +    return(strncmp(str, "cid://", 6) == 0);
    3.29 +}
    3.30  
    3.31  static bool string_equality(const char *s1, const char *s2)
    3.32  {
    3.33 @@ -48,7 +71,7 @@
    3.34  {
    3.35      assert(fe);
    3.36  
    3.37 -    if (bl == NULL || bl->filename == NULL || fe == NULL)
    3.38 +    if (bl == NULL || bl->filename == NULL || fe == NULL || is_cid_uri(bl->filename))
    3.39          return false;
    3.40  
    3.41      assert(bl && bl->filename);
    3.42 @@ -427,7 +450,7 @@
    3.43      dst->attachments = _a;
    3.44  
    3.45      _a = bloblist_add(_a, ctext, csize, "application/octet-stream",
    3.46 -        "msg.asc");
    3.47 +        "file://msg.asc");
    3.48      if (_a == NULL)
    3.49          goto enomem;
    3.50  
    3.51 @@ -528,7 +551,7 @@
    3.52          if (ctext) {
    3.53  
    3.54              bloblist_t *_a = bloblist_add(dst->attachments, ctext, csize,
    3.55 -                "application/octet-stream", "PGPexch.htm.pgp");
    3.56 +                "application/octet-stream", "file://PGPexch.htm.pgp");
    3.57              if (_a == NULL)
    3.58                  goto enomem;
    3.59              if (dst->attachments == NULL)
    3.60 @@ -567,24 +590,34 @@
    3.61                  if (ctext) {
    3.62                      char *filename = NULL;
    3.63  
    3.64 -                    if (_s->filename) {
    3.65 +                    char *attach_fn = _s->filename;
    3.66 +                    if (attach_fn && !is_cid_uri(attach_fn)) {
    3.67                          size_t len = strlen(_s->filename);
    3.68                          size_t bufsize = len + 5; // length of .pgp extension + NUL
    3.69 +                        bool already_uri = false;
    3.70 +                        if (is_file_uri(attach_fn))
    3.71 +                            already_uri = true;
    3.72 +                        else
    3.73 +                            bufsize += 7; // length of file://
    3.74 +                            
    3.75                          filename = calloc(1, bufsize);
    3.76                          if (filename == NULL)
    3.77                              goto enomem;
    3.78  
    3.79 -                        strlcpy(filename, _s->filename, bufsize);
    3.80 +                        if (!already_uri)
    3.81 +                            strlcpy(filename, "file://", bufsize);
    3.82 +                        // First char is NUL, so we're ok, even if not copying above. (calloc)
    3.83 +                        strlcat(filename, _s->filename, bufsize);
    3.84                          strlcat(filename, ".pgp", bufsize);
    3.85                      }
    3.86                      else {
    3.87 -                        filename = calloc(1, 20);
    3.88 +                        filename = calloc(1, 27);
    3.89                          if (filename == NULL)
    3.90                              goto enomem;
    3.91  
    3.92                          ++n;
    3.93                          n &= 0xffff;
    3.94 -                        snprintf(filename, 20, "Attachment%d.pgp", n);
    3.95 +                        snprintf(filename, 20, "file://Attachment%d.pgp", n);
    3.96                      }
    3.97  
    3.98                      _d = bloblist_add(_d, ctext, csize, "application/octet-stream",
    3.99 @@ -736,7 +769,7 @@
   3.100  {
   3.101      assert(blob);
   3.102  
   3.103 -    if (blob == NULL || blob->filename == NULL)
   3.104 +    if (blob == NULL || blob->filename == NULL || is_cid_uri(blob->filename))
   3.105          return false;
   3.106  
   3.107      char *ext = strrchr(blob->filename, '.');
   3.108 @@ -760,12 +793,13 @@
   3.109  {
   3.110      assert(blob);
   3.111      assert(blob->filename);
   3.112 -    if (blob == NULL || blob->filename == NULL)
   3.113 +    if (blob == NULL || blob->filename == NULL || is_cid_uri(blob->filename))
   3.114          return false;
   3.115  
   3.116 -    if (strncmp(blob->filename, "PGPexch.htm.", 12) == 0) {
   3.117 -        if (strcmp(blob->filename + 11, ".pgp") == 0 ||
   3.118 -            strcmp(blob->filename + 11, ".asc") == 0)
   3.119 +    const char* bare_filename_ptr = _get_resource_ptr_noown(blob->filename);
   3.120 +    if (strncmp(bare_filename_ptr, "PGPexch.htm.", 12) == 0) {
   3.121 +        if (strcmp(bare_filename_ptr + 11, ".pgp") == 0 ||
   3.122 +            strcmp(bare_filename_ptr + 11, ".asc") == 0)
   3.123              return true;
   3.124      }
   3.125  
   3.126 @@ -775,7 +809,7 @@
   3.127  static char * without_double_ending(const char *filename)
   3.128  {
   3.129      assert(filename);
   3.130 -    if (filename == NULL)
   3.131 +    if (filename == NULL || is_cid_uri(filename))
   3.132          return NULL;
   3.133  
   3.134      char *ext = strrchr(filename, '.');
   3.135 @@ -1020,7 +1054,7 @@
   3.136      assert(size);
   3.137  
   3.138       bloblist_t *bl = bloblist_add(msg->attachments, keydata, size, "application/pgp-keys",
   3.139 -                      "pEpkey.asc");
   3.140 +                      "file://pEpkey.asc");
   3.141  
   3.142      if (msg->attachments == NULL && bl)
   3.143          msg->attachments = bl;
     4.1 --- a/src/mime.c	Mon Jun 19 09:32:00 2017 +0200
     4.2 +++ b/src/mime.c	Tue Jun 20 14:28:31 2017 +0200
     4.3 @@ -559,6 +559,39 @@
     4.4      return status;
     4.5  }
     4.6  
     4.7 +static bool has_exceptional_extension(char* filename) {
     4.8 +    if (!filename)
     4.9 +        return false;
    4.10 +    int len = strlen(filename);
    4.11 +    if (len < 4)
    4.12 +        return false;
    4.13 +    char* ext_start = filename + (len - 4);
    4.14 +    if (strcmp(ext_start, ".pgp") == 0 || strcmp(ext_start, ".gpg") == 0 ||
    4.15 +        strcmp(ext_start, ".asc") == 0 || strcmp(ext_start, ".pEp") == 0)
    4.16 +        return true;
    4.17 +    return false;
    4.18 +}
    4.19 +
    4.20 +static pEp_rid_list_t* choose_resource_id(pEp_rid_list_t* rid_list) {
    4.21 +    pEp_rid_list_t* retval = NULL;
    4.22 +    
    4.23 +    /* multiple elements - least common case */
    4.24 +    if (rid_list && rid_list->next) {
    4.25 +        pEp_rid_list_t* rid_list_curr = rid_list;
    4.26 +        retval = rid_list; 
    4.27 +        
    4.28 +        while (rid_list_curr) {
    4.29 +            pEp_resource_id_type rid_type = rid_list_curr->rid_type;
    4.30 +            if (rid_type == PEP_RID_CID)
    4.31 +                retval = rid_list_curr;
    4.32 +            else if (rid_type == PEP_RID_FILENAME && has_exceptional_extension(rid_list_curr->rid))
    4.33 +                return rid_list_curr;
    4.34 +            rid_list_curr = rid_list_curr->next;
    4.35 +        }
    4.36 +    } 
    4.37 +    return retval;
    4.38 +}
    4.39 +
    4.40  static PEP_STATUS mime_encode_message_plain(
    4.41          const message *msg,
    4.42          bool omit_fields,
    4.43 @@ -1343,16 +1376,43 @@
    4.44                  if (status)
    4.45                      return status;
    4.46  
    4.47 -                filename = _get_filename_or_cid(mime);
    4.48 +                pEp_rid_list_t* resource_id_list = _get_resource_id_list(mime);
    4.49 +                pEp_rid_list_t* chosen_resource_id = choose_resource_id(resource_id_list);
    4.50 +                
    4.51 +                //filename = _get_filename_or_cid(mime);
    4.52                  char *_filename = NULL;
    4.53 -                if (filename) {
    4.54 +                
    4.55 +                if (chosen_resource_id) {
    4.56 +                    filename = chosen_resource_id->rid;
    4.57                      size_t index = 0;
    4.58 +                    /* NOTA BENE */
    4.59 +                    /* The prefix we just added shouldn't be a problem - this is about decoding %XX (RFC 2392) */
    4.60 +                    /* If it becomes one, we have some MESSY fixing to do. :(                                  */
    4.61                      r = mailmime_encoded_phrase_parse("utf-8", filename,
    4.62                              strlen(filename), &index, "utf-8", &_filename);
    4.63 -                    free(filename); /* new - _get_filename returns malloc'd str */
    4.64                      if (r) {
    4.65                          goto enomem;
    4.66                      }
    4.67 +                    char* file_prefix = NULL;
    4.68 +                    
    4.69 +                    /* in case there are others later */
    4.70 +                    switch (chosen_resource_id->rid_type) {
    4.71 +                        case PEP_RID_CID:
    4.72 +                            file_prefix = "cid";
    4.73 +                            break;
    4.74 +                        case PEP_RID_FILENAME:
    4.75 +                            file_prefix = "file";
    4.76 +                            break;
    4.77 +                        default:
    4.78 +                            break;
    4.79 +                    }
    4.80 +
    4.81 +                    
    4.82 +                    if (file_prefix) {
    4.83 +                        filename = _build_uri(file_prefix, _filename);
    4.84 +                        free(_filename);
    4.85 +                        _filename = filename;
    4.86 +                    }
    4.87                  }
    4.88  
    4.89                  bloblist_t *_a = bloblist_add(msg->attachments, data, size,
     5.1 --- a/src/sync_impl.c	Mon Jun 19 09:32:00 2017 +0200
     5.2 +++ b/src/sync_impl.c	Tue Jun 20 14:28:31 2017 +0200
     5.3 @@ -851,7 +851,7 @@
     5.4              for (stringlist_t *_keylist=keylist; _keylist!=NULL; _keylist=_keylist->next) {
     5.5                  char *fpr = _keylist->value;
     5.6                  static char filename[MAX_LINELENGTH];
     5.7 -                int result = snprintf(filename, MAX_LINELENGTH, "%s-sec.asc", fpr);
     5.8 +                int result = snprintf(filename, MAX_LINELENGTH, "file://%s-sec.asc", fpr);
     5.9                  if (result < 0)
    5.10                      goto enomem;
    5.11                  char *key = NULL;
    5.12 @@ -969,4 +969,3 @@
    5.13  
    5.14      return group_key_extra_dst;
    5.15  }
    5.16 -
     6.1 --- a/test/revoke_regen_attach_test.cc	Mon Jun 19 09:32:00 2017 +0200
     6.2 +++ b/test/revoke_regen_attach_test.cc	Tue Jun 20 14:28:31 2017 +0200
     6.3 @@ -68,8 +68,8 @@
     6.4  
     6.5      cout << msg->attachments->filename;
     6.6      assert(bloblist_length(msg->attachments) == 2);
     6.7 -    assert(strcmp(msg->attachments->filename, "pEpkey.asc") == 0);
     6.8 -    assert(strcmp(msg->attachments->next->filename, "pEpkey.asc") == 0);
     6.9 +    assert(strcmp(msg->attachments->filename, "file://pEpkey.asc") == 0);
    6.10 +    assert(strcmp(msg->attachments->next->filename, "file://pEpkey.asc") == 0);
    6.11  
    6.12      cout << "message contains 2 key attachements.\n";
    6.13  
    6.14 @@ -82,4 +82,3 @@
    6.15  
    6.16      return 0;
    6.17  }
    6.18 -