file generation and inclusion in engine. still a ways from done. peg-mime
authorKrista Grothoff <krista@pep-project.org>
Mon, 19 Sep 2016 00:40:37 +0200
branchpeg-mime
changeset 1178335ac741184f
parent 1177 2d35f26558c2
child 1179 bb7d1ad3c543
file generation and inclusion in engine. still a ways from done.
Makefile
mime-parser/Makefile
mime-parser/mailmime.peg
src/mailmime.c
src/mailmime.h
     1.1 --- a/Makefile	Thu Sep 15 14:57:43 2016 +0200
     1.2 +++ b/Makefile	Mon Sep 19 00:40:37 2016 +0200
     1.3 @@ -8,6 +8,7 @@
     1.4  	$(MAKE) -C asn.1 generate
     1.5  	$(MAKE) -C asn.1
     1.6  	$(MAKE) -C sync
     1.7 +	$(MAKE) -C mime-parser
     1.8  	$(MAKE) -C src all
     1.9  
    1.10  .PHONY: clean build_test test package install uninstall db
    1.11 @@ -23,6 +24,7 @@
    1.12  	$(MAKE) -C test clean
    1.13  	$(MAKE) -C db clean
    1.14  	$(MAKE) -C sync clean
    1.15 +	$(MAKE) -C mime-parser clean
    1.16  	$(MAKE) -C asn.1 clean
    1.17  
    1.18  test: all
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/mime-parser/Makefile	Mon Sep 19 00:40:37 2016 +0200
     2.3 @@ -0,0 +1,12 @@
     2.4 +include ../Makefile.conf
     2.5 +
     2.6 +all: generate
     2.7 +	cp mailmime.peg.c.src ../src/.
     2.8 +
     2.9 +generate: 
    2.10 +	peg mailmime.peg -o mailmime.peg.c.src
    2.11 +
    2.12 +.PHONY: clean
    2.13 +
    2.14 +clean:
    2.15 +	rm -f mailmime.peg.c
     3.1 --- a/mime-parser/mailmime.peg	Thu Sep 15 14:57:43 2016 +0200
     3.2 +++ b/mime-parser/mailmime.peg	Mon Sep 19 00:40:37 2016 +0200
     3.3 @@ -1,23 +1,96 @@
     3.4  Mail                    <-      ( Header )+ CRLF ( Body )? EOF
     3.5  
     3.6  # As pretty as it would be to make the header parse elegant, 
     3.7 -# the parse semantics work better this way
     3.8 +# the parse semantics work better this way.
     3.9 +# Marginally, a header is HeaderKey COLON HeaderValue (with some
    3.10 +# lines possible afterwards), but we have some specific headers that
    3.11 +# have meaning, so they are explicit here.
    3.12  Header                  <-      MIMEHeader / MessageHeader / GenericHeader
    3.13  
    3.14 +GenericHeader           <-      HeaderKey COLON HeaderText
    3.15 +HeaderKey               <-      ( Alpha / Digit / OrdinarySymbol )+
    3.16 +HeaderText              <-      HeaderLine ( HeaderCont )*
    3.17 +HeaderCont              <-      Whitespace HeaderLine
    3.18 +HeaderLine              <-      Text* CRLF
    3.19 +
    3.20 +#MIME Header Blocks
    3.21 +
    3.22 +# MessageHeaders      <-      (EntityHeaderBlock Fields MIMEVersionHeader) /
    3.23 +#                             (EntityHeaderBlock MIMEVersionHeader Fields) /
    3.24 +#                             (MIMEVersionHeader Fields EntityHeaderBlock) /
    3.25 +#                             (MIMEVersionHeader EntityHeaderBlock Fields) /
    3.26 +#                             (Fields EntityHeaderBlock MIMEVersionHeader) /
    3.27 +#                             (Fields MIMEVersionHeader EntityHeaderBlock)
    3.28 +
    3.29 +# Horrible def.
    3.30 +# MessagePartHeaders         <-      Field* EntityHeaderBlock Field* 
    3.31 +
    3.32 +#MIMEMessageHeaderBlock  <-      EntityHeaderBlock   
    3.33 +#                                (Field* EntityHeaderBlock)
    3.34 +
    3.35 +# Not really clear if this ordering matters. Sigh.
    3.36 +#EntityHeaderBlock       <-      MIMEContentTypeHeader? MIMETransfEncodeHeader? MIMEContentIDHeader? MIMEContentDescHeader? MIMEExtensionHeader* 
    3.37 +
    3.38 +#Fields                  <-      MessageHeader / GenericHeader
    3.39 +
    3.40 +# Mime-related headers
    3.41  MIMEHeader              <-      MIMEVersionHeader /
    3.42 +                                MIMEContentIDHeader /
    3.43                                  MIMEContentTypeHeader /
    3.44 -                                MIMEContentDispoHeader /
    3.45 -                                MIMETransfEncodeHeader
    3.46 +                                MIMETransfEncodeHeader /
    3.47 +                                MIMEContentDescHeader /
    3.48 +                                MIMEExtensionHeader
    3.49 +
    3.50 +
    3.51 +MIMEExtensionHeader     <-      MIMEContentDispoHeader / MIMEGenericExtHeader
    3.52  MIMEVersionHeader       <-      MIMEVersionKey COLON VersionString
    3.53  MIMEContentTypeHeader   <-      MIMEContentTypeKey COLON ContentTypeString
    3.54 +MIMEContentIDHeader     <-      MIMEContentIDKey COLON ContentIDString
    3.55 +MIMETransfEncodeHeader  <-      MIMETransfEncodeKey COLON TransfEncodeString
    3.56 +MIMEContentDescHeader   <-      MIMEContentDescKey COLON ContentDescString
    3.57 +
    3.58  MIMEContentDispoHeader  <-      MIMEContentDispoKey COLON ContentDispoString
    3.59 -MIMETransfEncodeHeader  <-      MIMETransfEncodeKey COLON TransfEncodeString
    3.60 +MIMEGenericExtHeader    <-      MIMEExtensionKey COLON ExtensionString
    3.61  
    3.62 +MIMEVersionKey          <-      'MIME-Version'
    3.63 +MIMEContentTypeKey      <-      'Content-Type'
    3.64 +MIMEContentDispoKey     <-      'Content-Disposition'
    3.65 +MIMETransfEncodeKey     <-      'Content-Transfer-Encoding'
    3.66 +MIMEContentIDKey        <-      'Content-ID'
    3.67 +MIMEContentDescKey      <-      'Content-Description'
    3.68 +MIMEExtensionKey        <-      'Content-'HeaderKey
    3.69 +
    3.70 +#VersionString           <-      Text* Digit Text* "." Text* Digit Text* 
    3.71  VersionString           <-      HeaderText
    3.72 -ContentTypeString       <-      HeaderText
    3.73 +ContentTypeString       <-      ContentType "/" ContentSubtype (";" Parameter)*
    3.74  ContentDispoString      <-      HeaderText
    3.75 -TransfEncodeString      <-      HeaderText
    3.76 +TransfEncodeString      <-      "7bit" / "8bit" / "binary" /
    3.77 +                                "quoted-printable" / "base64" /
    3.78 +                                IETFToken / XToken
    3.79 +ContentIDString         <-      HeaderText
    3.80 +ContentDescString       <-      HeaderText
    3.81 +ExtensionString         <-      HeaderText
    3.82  
    3.83 +# RFC2045 Section 5 - we should extend these to ones we care about
    3.84 +ContentType             <-      DiscreteContentType / CompositeContentType
    3.85 +DiscreteContentType     <-      "text" / "image" / "audio" / 
    3.86 +                                "video" / "application" / 
    3.87 +                                ExtensionToken
    3.88 +CompositeContentType    <-      "message" / "multipart" / 
    3.89 +                                ExtensionToken
    3.90 +ExtensionToken          <-      XToken / IETFToken
    3.91 +XToken                  <-      "X-" Text
    3.92 +IETFToken               <-      Text
    3.93 +ContentSubtype          <-      ExtensionToken / IANAToken
    3.94 +IANAToken               <-      Text
    3.95 +Parameter               <-      Attribute "=" Value #attribute matching always case insensitive
    3.96 +Attribute               <-      Token
    3.97 +Value                   <-      Token / QuotedString
    3.98 +Token                   <-      SymbolNoTSpecials+
    3.99 +
   3.100 +
   3.101 +
   3.102 +# Message-related headers (per RFC2822)
   3.103  MessageHeader           <-      MessageIDHeader /
   3.104                                  MessageToHeader /
   3.105                                  MessageFromHeader /
   3.106 @@ -26,8 +99,8 @@
   3.107                                  MessageSubjHeader /
   3.108                                  MessageReplyToHeader /
   3.109                                  MessageInReplyToHeader /
   3.110 -                                MessageRefsHeader                            
   3.111 -
   3.112 +                                MessageRefsHeader      
   3.113 +                                
   3.114  MessageIDHeader         <-      MessageIDKey COLON IDString
   3.115  MessageToHeader         <-      MessageToKey COLON ToString
   3.116  MessageFromHeader       <-      MessageFromKey COLON FromString
   3.117 @@ -37,7 +110,15 @@
   3.118  MessageReplyToHeader    <-      MessageReplyToKey COLON ReplyToString
   3.119  MessageInReplyToHeader  <-      MessageInReplyToKey COLON InReplyToString
   3.120  MessageRefsHeader       <-      MessageRefsKey COLON RefsString
   3.121 -
   3.122 +MessageIDKey            <-      'Message-ID'
   3.123 +MessageToKey            <-      'To'
   3.124 +MessageFromKey          <-      'From'
   3.125 +MessageCCKey            <-      'Cc'
   3.126 +MessageBCCKey           <-      'Bcc'
   3.127 +MessageSubjKey          <-      'Subject'
   3.128 +MessageReplyToKey       <-      'Reply-To'
   3.129 +MessageInReplyToKey     <-      'In-Reply-To'
   3.130 +MessageRefsKey          <-      'References'  
   3.131  IDString                <-      HeaderText
   3.132  ToString                <-      HeaderText
   3.133  FromString              <-      HeaderText
   3.134 @@ -49,37 +130,51 @@
   3.135  RefsString              <-      HeaderText
   3.136  
   3.137  
   3.138 -MIMEVersionKey          <-      'MIME-Version'
   3.139 -MIMEContentTypeKey      <-      'Content-Type'
   3.140 -MIMEContentDispoKey     <-      'Content-Disposition'
   3.141 -MIMETransfEncodeKey     <-      'Content-Transfer-Encoding'
   3.142 -MessageIDKey            <-      'Message-ID'
   3.143 -MessageToKey            <-      'To'
   3.144 -MessageFromKey          <-      'From'
   3.145 -MessageCCKey            <-      'Cc'
   3.146 -MessageBCCKey           <-      'Bcc'
   3.147 -MessageSubjKey          <-      'Subject'
   3.148 -MessageReplyToKey       <-      'Reply-To'
   3.149 -MessageInReplyToKey     <-      'In-Reply-To'
   3.150 -MessageRefsKey          <-      'References'  
   3.151 -                            
   3.152 -GenericHeader       <-      HeaderKey COLON HeaderText
   3.153 -HeaderKey           <-      ( Alpha / Digit / OrdinarySymbol )+
   3.154 -HeaderText          <-      HeaderLine ( HeaderCont )*
   3.155 -HeaderCont          <-      Whitespace HeaderLine
   3.156 -HeaderLine          <-      Text* CRLF
   3.157 -
   3.158 +                        
   3.159  Body                <-      PlainBody
   3.160  PlainBody           <-      .* EOF
   3.161 +
   3.162 +Encapsulation       <-      Delimiter BodyPart CRLF
   3.163 +CloseDelimiter      <-      Delimiter "--"
   3.164 +Delimiter           <-      CRLF DashBoundary
   3.165 +BodyPart            <-      MIMEHeader+ (CRLF OCTET*)?
   3.166 +DashBoundary        <-      "--" Boundary
   3.167 +Boundary            <-      < BChars* BCharsNoSpace >
   3.168 +BCharsNoSpace       <-      BChars / Space
   3.169 +BChars              <-      Digit / Alpha / "'" / "(" / ")" /
   3.170 +                            "+" / "_" / "," / "-" / "." /
   3.171 +                            "/" / ":" / "=" / "?"
   3.172 +Preamble            <-      DiscardText
   3.173 +Epilogue            <-      DiscardText
   3.174 +DiscardText         <-      ((!CRLF)* CRLF)* (!CRLF)*
   3.175 +
   3.176 +CContent            <-      CText / QuotedPair / Comment
   3.177 +CFWS                <-      (FoldingWhiteSpace? Comment)* ((FoldingWhiteSpace? Comment) / FoldingWhiteSpace)
   3.178 +Comment             <-      "(" ([FoldingWhiteSpace]? CContent)* FoldingWhiteSpace? ")"
   3.179 +FoldingWhiteSpace   <-      (Whitespace* CRLF)? Whitespace+ /   
   3.180 +                            Whitespace+ (CRLF Whitespace+)*
   3.181 +CText               <-      NoWSCtl / 
   3.182 +                            Alpha / Digit / CommentSymbols
   3.183 +QuotedString        <-      CFWS '"' (FoldingWhiteSpace? QuotedContent)* FoldingWhiteSpace? '"'
   3.184 +QuotedContent       <-      QText / QuotedPair
   3.185 +QText               <-      !["\"" "\\" CR]
   3.186 +QuotedPair          <-      "\\". / "\\" Text
   3.187 +
   3.188  Text                <-      ( Alpha / Digit / Symbol / Space )+
   3.189  Alpha               <-      [a-zA-Z]
   3.190 -Digit               <-      [0-9]
   3.191 +Digit               <-      [0-9]*
   3.192  Symbol              <-      OrdinarySymbol / COLON
   3.193  Whitespace          <-      Space+
   3.194  Space               <-      [' ' '\t']
   3.195 -OrdinarySymbol      <-      ["!" "#" "$" "%" "&" "'" "*" "+" "\-" "/" "\" "=" "?" "^" "_" "`" "{" "|" "}" "~" "\[" "\]" "@" ";" "." "<" ">" "\"" "(" ")" "," "\\"]
   3.196 +
   3.197 +OrdinarySymbol      <-      ["!" "#" "$" "%" "&" "'" "*" "+" "\-" "/" "=" "?" "^" "_" "`" "{" "|" "}" "~" "\[" "\]" "@" ";" "." "<" ">" "\"" "(" ")" "," "\\"]
   3.198 +SymbolNoTSpecials   <-      ["!" "#" "$" "%" "&" "'" "*" "+" "-" "." "^" "_" "`" "{" "|" "}" "~"]
   3.199 +CommentSymbols      <-      [\041-\047] / [\52-57] / [\072-\100] / [\133] / [\135-\140] / [\173-177]
   3.200 +
   3.201 +OCTET               <-      '\\' (([0-1] [0-9] [0-9]) / ("2" (([0-4] [0-9]) / ("5" [0-5])))) 
   3.202  COLON               <-      ':'
   3.203  NEWLINE             <-      '\n'
   3.204  CR                  <-      '\r'
   3.205  CRLF                <-      CR CR? NEWLINE 
   3.206 -EOF                 <-      !.
   3.207 \ No newline at end of file
   3.208 +EOF                 <-      !.
   3.209 +NoWSCtl             <-      [\001-\010] / [\013-\014] / [\016-\037] / [\177]
   3.210 \ No newline at end of file
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/src/mailmime.c	Mon Sep 19 00:40:37 2016 +0200
     4.3 @@ -0,0 +1,29 @@
     4.4 +#include <stdio.h>
     4.5 +#include <stdlib.h>
     4.6 +#include <assert.h>
     4.7 +#include "mailmime.h"
     4.8 +#include "mailmime.peg.c.src"
     4.9 +#include "pEpEngine.h"
    4.10 +
    4.11 +PEP_STATUS parse_mailmessage(const char *mimetext,
    4.12 +                         message **msg) {
    4.13 +    PEP_STATUS status = PEP_STATUS_OK;
    4.14 +    message *_msg = NULL;
    4.15 +    
    4.16 +    assert(mimetext);
    4.17 +    assert(msg);
    4.18 +    
    4.19 +    if (!(mimetext && msg))
    4.20 +        return PEP_ILLEGAL_VALUE;
    4.21 +    
    4.22 +    *msg = NULL;
    4.23 +    
    4.24 +    yycontext ctx;
    4.25 +    memset(&ctx, 0, sizeof(yycontext));
    4.26 +    ctx.input_str = mimetext;
    4.27 +    while (yyparse(&ctx));
    4.28 +
    4.29 +    return PEP_STATUS_OK;
    4.30 +}
    4.31 +
    4.32 +
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/src/mailmime.h	Mon Sep 19 00:40:37 2016 +0200
     5.3 @@ -0,0 +1,74 @@
     5.4 +#ifndef MAILMIME_H
     5.5 +#define MAILMIME_H
     5.6 +
     5.7 +#include "pEpEngine.h"
     5.8 +#include "message.h"
     5.9 +
    5.10 +#define YY_DEBUG
    5.11 +#define YY_CTX_LOCAL
    5.12 +#define YY_CTX_MEMBERS char*   input_str;
    5.13 +
    5.14 +#define YY_INPUT(yycontext, buf, result, max_size)         \
    5.15 +{                                               \
    5.16 +    int yyc = *(yycontext->input_str)++;         \
    5.17 +    result= (!yyc) ? 0 : (*(buf)= yyc, 1);      \
    5.18 +    yyprintf((stderr, "<%c>", yyc));			\
    5.19 +}
    5.20 +
    5.21 +struct _pEpMailMime;
    5.22 +typedef struct _pEpMailMime pEp_mailmime;
    5.23 +
    5.24 +/* These are in no way comprehensive - these are the ones we may act on. */
    5.25 +typedef enum {
    5.26 +    CONTENT_MESSAGE         =           0x100,
    5.27 +    CONTENT_MULTIPART       =           0x101,
    5.28 +    CONTENT_COMPOSITE_OTHER =           0x10F,
    5.29 +    
    5.30 +    /* IANA media types */
    5.31 +    CONTENT_APPLICATION     =           0x200,
    5.32 +    CONTENT_AUDIO           =           0x201,
    5.33 +    CONTENT_EXAMPLE         =           0x202,
    5.34 +    CONTENT_IMAGE           =           0x203,
    5.35 +    CONTENT_MODEL           =           0x205,
    5.36 +    CONTENT_TEXT            =           0x206,
    5.37 +    CONTENT_VIDEO           =           0x207,
    5.38 +    
    5.39 +    CONTENT_OTHER           =           0xFFF
    5.40 +} MIME_content_type;
    5.41 +
    5.42 +/* These are in no way comprehensive - these are the ones we may act on. */
    5.43 +typedef enum {
    5.44 +    /* MULTIPART SUBTYPES */
    5.45 +    SUBTYPE_MIXED           =           0x100,
    5.46 +    SUBTYPE_DIGEST          =           0x101,
    5.47 +    SUBTYPE_RFC822          =           0x102,
    5.48 +    SUBTYPE_ALTERNATIVE     =           0x103,
    5.49 +    SUBTYPE_RELATED         =           0x104,
    5.50 +    SUBTYPE_REPORT          =           0x105,
    5.51 +    SUBTYPE_SIGNED          =           0x106,
    5.52 +    SUBTYPE_ENCRYPTED       =           0x107,
    5.53 +    SUBTYPE_FORMDATA        =           0x108,
    5.54 +    SUBTYPE_BYTERANGE       =           0x109,
    5.55 +    SUBTYPE_PARTIAL         =           0x10A,
    5.56 +
    5.57 +    /* APPLICATION SUBTYPES WE CARE ABOUT RIGHT NOW */
    5.58 +    SUBTYPE_PGP_ENCRYPTED   =           0x201,
    5.59 +    SUBTYPE_PGP_SIGNATURE   =           0x202,
    5.60 +    SUBTYPE_OCTET_STREAM    =           0x203,
    5.61 +    
    5.62 +    /* TEXT SUBTYPES WE CARE ABOUT RIGHT NOW */
    5.63 +    SUBTYPE_PLAIN           =           0x300,
    5.64 +    SUBTYPE_HTML            =           0x301,
    5.65 +    
    5.66 +    SUBTYPE_OTHER           =           0xfff
    5.67 +} MIME_content_subtype;
    5.68 +
    5.69 +typedef struct _pEpMailMime {
    5.70 +    char* content_id;
    5.71 +    MIME_content_type content_type;
    5.72 +    MIME_content_type content_subtype;
    5.73 +    pEp_mailmime* next;
    5.74 +    pEp_mailmime* first_child;
    5.75 +} pEp_mailmime;
    5.76 +
    5.77 +#endif