Import a serialized email message in IMF (Internet Message Format). This hzEmail instance is first cleared, then populated with the supplied IMF datum. To be valid, a message must have a sender, at least one recipient, a date and time and a message id. For the purpose of HadronZoo::Epistula, a message must have a body. Within the message header, there is usually a Content-Type header. This can specify a format such as "text/plain", but this is only where the message body is not comprised of multiple parts. Email messages are mostly multipart, with a Content-Type of either:- multipart/mixed This is for sending files with different Content-Type header fields, either to be displayed upon opening by the mail client, or as attachments. multipart/alternative This indicates that each part is an alternative version of the same (or similar) content, e.g. a text part and a HTML part. Each part will have its own Content-Type header to state the format. multipart/related This is used to indicate that each message part is a component of an aggregate whole. It is for messages consisting of a number of inter-related parts. The message consists of a root part which reference other parts, which may in turn reference other parts. One use would be to send a web page complete with images in a single message. The parts are separated by a boundary sequence which is specified in the line after a multipart Content-Type header. In theory a single boundary sequence would suffice, but email messages often use one part to specify another boundary sequence. This latter sequence is then used to separate parts, until it is terminated (postpended with --), at which point use of the previous boundary sequence resumes. Because of this approach, this function calls a recursive support function, _part_process(), to process parts.
| Return Type | Function name | Arguments |
|---|---|---|
| hzEcode | hzEmail::Import | (hzChain&,bool,) |
Declared in file: hzMailer.h
Defined in file : hzMailer.cpp
Function Logic:
Function body:
hzEcode hzEmail::Import (hzChain& emRaw)bool bHead,
{
// Import a serialized email message in IMF (Internet Message Format).
//
// This hzEmail instance is first cleared, then populated with the supplied IMF datum. To be valid, a message must have a sender, at least one recipient, a date and time and a
// message id. For the purpose of HadronZoo::Epistula, a message must have a body.
//
// Within the message header, there is usually a Content-Type header. This can specify a format such as "text/plain", but this is only where the message body is not comprised
// of multiple parts. Email messages are mostly multipart, with a Content-Type of either:-
//
// multipart/mixed This is for sending files with different Content-Type header fields, either to be displayed upon opening by the mail client, or as attachments.
//
// multipart/alternative This indicates that each part is an alternative version of the same (or similar) content, e.g. a text part and a HTML part. Each part will have
// its own Content-Type header to state the format.
//
// multipart/related This is used to indicate that each message part is a component of an aggregate whole. It is for messages consisting of a number of inter-related
// parts. The message consists of a root part which reference other parts, which may in turn reference other parts. One use would be to send a web
// page complete with images in a single message.
//
// The parts are separated by a boundary sequence which is specified in the line after a multipart Content-Type header. In theory a single boundary sequence would suffice, but
// email messages often use one part to specify another boundary sequence. This latter sequence is then used to separate parts, until it is terminated (postpended with --), at
// which point use of the previous boundary sequence resumes. Because of this approach, this function calls a recursive support function, _part_process(), to process parts.
//
// Arguments: emRaw The serialized email message supplied as a hzChain
// bHead Limit the import to headers only
//
// Returns: E_FORMAT If any aspect of the previously exported email are malformed
// E_BADVALUE If essential headers are missing or invalid
// E_NODATA If there is no message body
// E_OK If the import was successful
_hzfunc("hzEmail::Import") ;
hzChain Part ; // For building header line
chIter zi ; // For iteration
chIter xi ; // For iteration
chIter loop_stop ; // For iteration
chIter endHdr ; // End of header (eol)
chIter bodyStart ; // Start of body
hzEmpart epart ; // Email part
hzEmaddr emtmp ; // For holding email addresses in Cc and Bcc
hzString filepath ; // Full path to email file
hzString mark ; // Currently applicable boundary
hzString markStart ; // Boundary value prepended with '--'
hzString markEnd ; // Boundary value prepended with '--' and postpended with '--'
hzString strval ; // Temp string
hzString episId ; // Default Epistula ID (if no message id supplied)
uint32_t nP ; // Part counter
uint32_t nLen ; // No of chars to advance iterator by
hzEcode rc = E_OK ; // Return code
// Clear this email message instance and clear error report
Clear() ;
m_Err.Clear() ;
// Pre-process headers in message to hand.
zi = emRaw ;
for (; rc == E_OK && !zi.eof() ;)
{
if (*zi == CHAR_CR)
{
// At end of message header block?
if (zi == "\r\n\r\n")
{ zi += 4; break ; }
if (zi == "\r\n")
zi += 2;
}
// Discover end of line. Note that \r\n followed by a space or tab, is taken to be a continuation of the line.
_find_hdr_end(endHdr, zi) ;
// Now process line
loop_stop = zi ;
switch (*zi)
{
case CHAR_LC_B:
case CHAR_UC_B: if (zi.Equiv("Bcc: "))
{
zi += 5;
if (_reademaddr(strval, emtmp, zi, endHdr))
m_BCC.Add(emtmp) ;
else
{ rc = E_FORMAT ; m_Err << "Bad BCC address\n" ; }
}
break ;
case CHAR_LC_C:
case CHAR_UC_C: if (zi.Equiv("Cc: "))
{
zi += 4;
if (_reademaddr(strval, emtmp, zi, endHdr))
m_CC.Add(emtmp) ;
else
{ rc = E_FORMAT ; m_Err << "Bad CC address\n" ; }
break ;
}
if (zi.Equiv("Content-Type: "))
{
zi += 14;
if (zi.Equiv("text/plain;")) { zi += 11;m_ContType = HZ_CONTENT_TYPE_TEXT_PLAIN ; }
else if (zi.Equiv("text/html;")) { zi += 10;m_ContType = HZ_CONTENT_TYPE_TEXT_HTML ; }
else if (zi.Equiv("multipart/alternative;")) { zi += 22;m_ContType = HZ_CONTENT_TYPE_MULTI_ALTERNATIVE ; }
else if (zi.Equiv("multipart/mixed;")) { zi += 16;m_ContType = HZ_CONTENT_TYPE_MULTI_MIXED ; }
else if (zi.Equiv("multipart/related;")) { zi += 18;m_ContType = HZ_CONTENT_TYPE_MULTI_RELATED ; }
else
{
rc = E_FORMAT ;
m_Err << "Unknown Content-Type\n" ;
break ;
}
if (m_ContType == HZ_CONTENT_TYPE_MULTI_MIXED || m_ContType == HZ_CONTENT_TYPE_MULTI_ALTERNATIVE || m_ContType == HZ_CONTENT_TYPE_MULTI_RELATED)
{
if (mark)
{
rc = E_FORMAT ;
m_Err << "MULTIPART MIX/ALT/REL: Boundary already specified\n" ;
break ;
}
for (; *zi ; zi++)
{
if (*zi == CHAR_LC_B)
{
// if (!memcmp(i, "boundary=", 9))
if (zi == "boundary=")
{
zi += 9;
rc = _readStrval(mark, zi) ;
if (rc != E_OK)
m_Err << "Failed to read boundary?\n" ;
break ;
}
}
}
if (!mark)
{ rc = E_FORMAT ; m_Err << "MULTIPART MIX/ALT/REL: Expected a boundary to be specified\n" ; }
}
break ;
}
if (zi == "Content-Transfer-Encoding: ")
{
zi += 27;
if (zi == "7bit") m_Encoding = HZ_CONTENT_ENCODE_7BIT ;
else if (zi == "8bit") m_Encoding = HZ_CONTENT_ENCODE_8BIT ;
else if (zi == "binary") m_Encoding = HZ_CONTENT_ENCODE_8BIT ;
else if (zi == "base64") m_Encoding = HZ_CONTENT_ENCODE_BASE64 ;
else if (zi == "quoted-printable") m_Encoding = HZ_CONTENT_ENCODE_QP ;
else
{ rc = E_FORMAT ; m_Err << "Unknown Content-Transfer-Encoding value\n" ; }
}
break ;
case CHAR_LC_D:
case CHAR_UC_D: if (zi.Equiv("Date: "))
{
zi += 6;
nLen = IsFormalDate(m_Date, zi) ;
if (!nLen)
m_Err << "Date arg must amount to legal date\n" ;
}
break ;
case CHAR_UC_F: if (zi == "From: ")
{
zi += 6;
if (!_reademaddr(m_RealFrom, m_AddrFrom, zi, endHdr))
{
rc = E_FORMAT ;
m_Err << "From: arg must amount to an email address\n" ; // [" ;
// for (; zi != endHdr ; zi++)
// m_Err.AddByte(*zi) ;
// m_Err << "]\n" ;
}
}
break ;
case CHAR_UC_M: if (zi == "Message-ID: ")
{
zi += 12;
if (!_readangle(m_Id, zi))
m_Err << "Could not read formal mail id?\n" ;
}
break ;
case CHAR_LC_R:
case CHAR_UC_R: if (zi.Equiv("Return-Path:"))
{
zi += 12;
if (!_reademaddr(strval, m_AddrReturn, zi, endHdr))
{ rc = E_FORMAT ; m_Err << "Return-Path must amount to an email address\n" ; }
break ;
}
if (zi.Equiv("Reply-To:"))
{
zi += 9;
if (!_reademaddr(m_RealReply, m_AddrReply, zi, endHdr))
{ rc = E_FORMAT ; m_Err << "Reply-To: arg must amount to an email address\n" ; }
}
break ;
case CHAR_LC_S:
case CHAR_UC_S: if (zi.Equiv("Subject: "))
{
zi += 9;
rc = _readStrval(strval, zi) ;
if (rc != E_OK)
m_Err << "Issues with subject\n" ;
m_Subject = strval ;
CharsetStringDecode(m_Subject, strval) ;
}
break ;
case CHAR_LC_T:
case CHAR_UC_T: if (zi.Equiv("To: "))
{
zi += 4;
if (!_reademaddr(m_RealTo, emtmp, zi, endHdr))
{ rc = E_FORMAT ; m_Err << "To: arg must amount to an email address\n" ; }
if (!m_AddrTo)
m_AddrTo = emtmp ;
m_Recipients.Add(emtmp) ;
}
break ;
case CHAR_LC_X:
case CHAR_UC_X: if (zi.Equiv("X-Epistula-ServerID: "))
{
zi += 21;
if (!_readangle(episId, zi))
m_Err << "Issues with epis-ID\n" ;
break ;
}
if (zi.Equiv("X-Epistula-Ingress: "))
{
zi += 20;
if (!_reademaddr(strval, m_AddrRelay, zi, endHdr))
m_Err << "X-Epistula-Ingress: arg must amount to an email address\n" ;
}
break ;
}
zi = endHdr ;
if (zi == loop_stop)
{
m_Err << "Loop stop condition\n" ;
rc = E_SYNTAX ;
break ;
}
}
if (rc != E_OK)
goto fail ;
// Check if no date
if (!m_Date)
{ rc = E_BADVALUE ; m_Err << "WARNING: No date header\n" ; }
// Check for critical headers
if (!m_AddrFrom)
{ rc = E_BADVALUE ; m_Err << "No From address\n" ; }
// if (m_Recipients.Count() == 0 && m_CC.Count() == 0 && m_BCC.Count() == 0)
if (!m_AddrTo)
{ rc = E_BADVALUE ; m_Err << "No recipients\n" ; }
// Warn if no mail id
if (!m_Id)
m_Id = episId ;
if (!m_Id)
{ rc = E_BADVALUE ; m_Err << "No mail id\n" ; }
if (rc != E_OK)
goto fail ;
if (bHead)
return rc ;
/*
** ** Process the body. This may or may not come in blocks marked out by the boundary.
** */
if (!mark)
{
// The email is comprised of a single part with no boundary. Unless this is stated as HTML it is presumed to be TEXT.
Part.Clear() ;
for (; !zi.eof() ; zi++)
{
if (*zi == CHAR_CR)
{
if (zi == "\r\n.\r\n")
break ;
}
Part.AddByte(*zi) ;
}
if (m_ContType == HZ_CONTENT_TYPE_TEXT_HTML)
rc = _part_decode(m_Html, Part, m_Encoding) ;
else
rc = _part_decode(m_Text, Part, m_Encoding) ;
if (rc != E_OK)
m_Err << "Decode issues\n" ;
m_Err << "Processed a non-part message\n" ;
return rc ;
}
// The email is comprised of multiple parts, marked by a boundary. We should be at the start of a boundary but may have to skip whitespace and lines like "This is a multi-part
// message in MIME format."
markStart = "--" + mark ;
markEnd = markStart + "--" ;
m_Err.Printf("Operating with BOUNDARY=%s\n", *mark) ;
if (zi != markStart)
{
// Assign data to the text or html part until a boudary is encountered
m_Err << "Unexpected data\n" ;
for (; !zi.eof() ; zi++)
{
if (zi == markStart)
break ;
}
}
for (nP = 0; rc == E_OK && !zi.eof() && zi == markStart ; nP++)
{
if (zi == markEnd)
break ;
m_Err.Printf("Processing part %u\n", nP) ;
rc = _part_process(zi, mark, 1);
if (rc != E_OK)
m_Err << "Part Process issues\n" ;
}
if (rc == E_OK)
return rc ;
fail:
threadLog("IMPORT FAILED\n") ;
threadLog(m_Err) ;
threadLog("--end--\n") ;
return rc ;
}