/** * @fileOverview This is the main file for the MailParser library to parse raw e-mail data * @author Andris Reinman * @version 0.2.23 */ var Stream = require("stream").Stream, utillib = require("util"), mimelib = require("mimelib"), datetime = require("./datetime"), encodinglib = require("encoding"), Streams = require("./streams"), crypto = require("crypto"), mime = require("mime"); // Expose to the world module.exports.MailParser = MailParser; // MailParser is a FSM - it is always in one of the possible states var STATES = { header: 0x1, body: 0x2, finished: 0x3 }; /** *
Creates instance of MailParser which in turn extends Stream
* *Options object has the following properties:
* *Writes a value to the MailParser stream
* * @param {Buffer|String} chunk The data to be written to the MailParser stream * @param {String} [encoding] The encoding to be used when "chunk" is a string * @returns {Boolean} Returns true */ MailParser.prototype.write = function(chunk, encoding){ if( this._write(chunk, encoding) ){ process.nextTick(this._process.bind(this)); } return true; }; /** *
Terminates the MailParser stream
* *If "chunk" is set, writes it to the Stream before terminating.
* * @param {Buffer|String} chunk The data to be written to the MailParser stream * @param {String} [encoding] The encoding to be used when "chunk" is a string */ MailParser.prototype.end = function(chunk, encoding){ this._write(chunk, encoding); if(this.options.debug && this._remainder){ console.log("REMAINDER: "+this._remainder); } process.nextTick(this._process.bind(this, true)); }; /** *Normalizes CRLF's before writing to the Mailparser stream, does not call `_process`
* * @param {Buffer|String} chunk The data to be written to the MailParser stream * @param {String} [encoding] The encoding to be used when "chunk" is a string * @returns {Boolean} Returns true if writing the chunk was successful */ MailParser.prototype._write = function(chunk, encoding){ if(typeof chunk == "string"){ chunk = new Buffer(chunk, encoding); } chunk = chunk && chunk.toString("binary") || ""; // if the last chunk ended with \r and this one begins // with \n, it's a split line ending. Since the last \r // was already used, skip the \n if(this._lineFeed && chunk.charAt(0) === "\n"){ chunk = chunk.substr(1); } this._lineFeed = chunk.substr(-1) === "\r"; if(chunk && chunk.length){ this._remainder += chunk; return true; } return false; }; /** *
Processes the data written to the MailParser stream
* *The data is split into lines and each line is processed individually. Last * line in the batch is preserved as a remainder since it is probably not a * complete line but just the beginning of it. The remainder is later prepended * to the next batch of data.
* * @param {Boolean} [finalPart=false] if set to true indicates that this is the last part of the stream */ MailParser.prototype._process = function(finalPart){ finalPart = !!finalPart; var lines = this._remainder.split(/\r?\n|\r/), line, i, len; if(!finalPart){ this._remainder = lines.pop(); // force line to 1MB chunks if needed if(this._remainder.length>1048576){ this._remainder = this._remainder.replace(/(.{1048576}(?!\r?\n|\r))/g,"$&\n"); } } for(i=0, len=lines.length; i < len; i++){ line = lines[i]; if(this.options.unescapeSMTP && line.substr(0,2)==".."){ line = line.substr(1); } if(this._isMbox === true && line.match(/^\>+From /)){ line = line.substr(1); } if(this.options.debug){ console.log("LINE " + (++this._lineCounter) + " ("+this._state+"): "+line); } if(this._state == STATES.header){ if(this._processStateHeader(line) === true){ continue; } } if(this._state == STATES.body){ if(this._processStateBody(line) === true){ continue; } } } if(finalPart){ if(this._state == STATES.header && this._remainder){ this._processStateHeader(this._remainder); if(!this._headersSent){ this.emit("headers", this._currentNode.parsedHeaders); this._headersSent = true; } } if(this._currentNode.content || this._currentNode.stream){ this._finalizeContents(); } this._state = STATES.finished; process.nextTick(this._processMimeTree.bind(this)); } }; /** *Processes a line while in header state
* *If header state ends and body starts, detect if the contents is an attachment * and create a stream for it if needed
* * @param {String} line The contents of a line to be processed * @returns {Boolean} If state changes to body retuns true */ MailParser.prototype._processStateHeader = function(line){ var attachment, lastPos = this._currentNode.headers.length - 1, textContent = false, extension; // Check if the header ends and body starts if(!line.length){ if(lastPos>=0){ this._processHeaderLine(lastPos); } if(!this._headersSent){ this.emit("headers", this._currentNode.parsedHeaders); this._headersSent = true; } this._state = STATES.body; // if there's unprocessed header data, do it now if(lastPos >= 0){ this._processHeaderLine(lastPos); } // this is a very simple e-mail, no content type set if(!this._currentNode.parentNode && !this._currentNode.meta.contentType){ this._currentNode.meta.contentType = "text/plain"; } textContent = ["text/plain", "text/html"].indexOf(this._currentNode.meta.contentType || "") >= 0; // detect if this is an attachment or a text node (some agents use inline dispositions for text) if(textContent && (!this._currentNode.meta.contentDisposition || this._currentNode.meta.contentDisposition == "inline")){ this._currentNode.attachment = false; }else if((!textContent || ["attachment", "inline"].indexOf(this._currentNode.meta.contentDisposition)>=0) && !this._currentNode.meta.mimeMultipart){ this._currentNode.attachment = true; } // handle attachment start if(this._currentNode.attachment){ this._currentNode.checksum = crypto.createHash("md5"); this._currentNode.meta.generatedFileName = this._generateFileName(this._currentNode.meta.fileName, this._currentNode.meta.contentType); this._currentNode.meta.contentId = this._currentNode.meta.contentId || crypto.createHash("md5").update(this._currentNode.meta.generatedFileName).digest("hex") + "@mailparser"; extension = this._currentNode.meta.generatedFileName.split(".").pop().toLowerCase(); // Update content-type if it's an application/octet-stream and file extension is available if(this._currentNode.meta.contentType == "application/octet-stream" && mime.lookup(extension)){ this._currentNode.meta.contentType = mime.lookup(extension); } attachment = this._currentNode.meta; if(this.options.streamAttachments){ if(this._currentNode.meta.transferEncoding == "base64"){ this._currentNode.stream = new Streams.Base64Stream(); }else if(this._currentNode.meta.transferEncoding == "quoted-printable"){ this._currentNode.stream = new Streams.QPStream("binary"); }else{ this._currentNode.stream = new Streams.BinaryStream(); } attachment.stream = this._currentNode.stream; this.emit("attachment", attachment); }else{ this._currentNode.content = undefined; } } return true; } // unfold header lines if needed if(line.match(/^\s+/) && lastPos>=0){ this._currentNode.headers[lastPos] += " " + line.trim(); }else{ this._currentNode.headers.push(line.trim()); if(lastPos>=0){ // if a complete header line is received, process it this._processHeaderLine(lastPos); } } return false; }; /** *Processes a line while in body state
* * @param {String} line The contents of a line to be processed * @returns {Boolean} If body ends return true */ MailParser.prototype._processStateBody = function(line){ var i, len, node, nodeReady = false; // Handle multipart boundaries if(line.substr(0, 2) == "--"){ for(i=0, len = this._multipartTree.length; iProcesses a complete unfolded header line
* *Processes a line from current node headers array and replaces its value.
* Input string is in the form of "X-Mailer: PHP" and its replacement would be
* an object {key: "x-mailer", value: "PHP"}
Additionally node meta object will be filled also, for example with data from * To: From: Cc: etc fields.
* * @param {Number} pos Which header element (from an header lines array) should be processed */ MailParser.prototype._processHeaderLine = function(pos){ var key, value, parts, line; pos = pos || 0; if(!(line = this._currentNode.headers[pos]) || typeof line != "string"){ return; } if(!this._headersSent && this._isMbox < 0){ if((this._isMbox = !!line.match(/^From /))){ return; } } parts = line.split(":"); key = parts.shift().toLowerCase().trim(); value = parts.join(":").trim(); switch(key){ case "content-type": this._parseContentType(value); break; case "mime-version": this._currentNode.useMIME = true; break; case "date": this._currentNode.meta.date = new Date(datetime.strtotime(value)*1000 || Date.now()); break; case "to": if(this._currentNode.to && this._currentNode.to.length){ this._currentNode.to = this._currentNode.to.concat(mimelib.parseAddresses(value)); }else{ this._currentNode.to = mimelib.parseAddresses(value); } break; case "from": if(this._currentNode.from && this._currentNode.from.length){ this._currentNode.from = this._currentNode.from.concat(mimelib.parseAddresses(value)); }else{ this._currentNode.from = mimelib.parseAddresses(value); } break; case "reply-to": if(this._currentNode.replyTo && this._currentNode.replyTo.length){ this._currentNode.replyTo = this._currentNode.replyTo.concat(mimelib.parseAddresses(value)); }else{ this._currentNode.replyTo = mimelib.parseAddresses(value); } break; case "cc": if(this._currentNode.cc && this._currentNode.cc.length){ this._currentNode.cc = this._currentNode.cc.concat(mimelib.parseAddresses(value)); }else{ this._currentNode.cc = mimelib.parseAddresses(value); } break; case "bcc": if(this._currentNode.bcc && this._currentNode.bcc.length){ this._currentNode.bcc = this._currentNode.bcc.concat(mimelib.parseAddresses(value)); }else{ this._currentNode.bcc = mimelib.parseAddresses(value); } break; case "x-priority": case "x-msmail-priority": case "importance": value = this._parsePriority(value); this._currentNode.priority = value; break; case "message-id": this._currentNode.meta.messageId = this._trimQuotes(value); this._currentNode.messageId = this._currentNode.meta.messageId; break; case "references": this._parseReferences(value); break; case "in-reply-to": this._parseInReplyTo(value); break; case "thread-index": this._currentNode.meta.threadIndex = value; break; case "content-transfer-encoding": this._currentNode.meta.transferEncoding = value.toLowerCase(); break; case "subject": this._currentNode.subject = this._encodeString(value); break; case "content-disposition": this._parseContentDisposition(value); break; case "content-id": this._currentNode.meta.contentId = this._trimQuotes(value); break; } if(this._currentNode.parsedHeaders[key]){ if(!Array.isArray(this._currentNode.parsedHeaders[key])){ this._currentNode.parsedHeaders[key] = [this._currentNode.parsedHeaders[key]]; } this._currentNode.parsedHeaders[key].push(this._replaceMimeWords(value)); }else{ this._currentNode.parsedHeaders[key] = this._replaceMimeWords(value); } this._currentNode.headers[pos] = {key: key, value: value}; }; /** *Creates an empty node element for the mime tree
* *Created element includes parentNode property and a childNodes array. This is * needed to later walk the whole mime tree
* * @param {Object} [parentNode] the parent object for the created node * @returns {Object} node element for the mime tree */ MailParser.prototype._createMimeNode = function(parentNode){ var node = { parentNode: parentNode || this._currentNode || null, headers: [], parsedHeaders:{}, meta: {}, childNodes: [] }; return node; }; /** *Splits a header value into key-value pairs
* *Splits on ;
- the first value will be set as defaultValue
property and will
* not be handled, others will be split on =
to key-value pairs
For example content-type: text/plain; charset=utf-8
will become:
* { * defaultValue: "text/plain", * charset: "utf-8" * } ** * @param {String} value A string to be splitted into key-value pairs * @returns {Object} a key-value object, with defaultvalue property */ MailParser.prototype._parseHeaderLineWithParams = function(value){ var key, parts, returnValue = {}; parts = value.split(";"); returnValue.defaultValue = parts.shift().toLowerCase(); for(var i=0, len = parts.length; i
Fetches additional properties from the content type (charset etc.) and fills * current node meta object with this data
* * @param {String} value Content-Type string * @returns {Object} parsed contenttype object */ MailParser.prototype._parseContentType = function(value){ var fileName; value = this._parseHeaderLineWithParams(value); if(value){ if(value.defaultValue){ value.defaultValue = value.defaultValue.toLowerCase(); this._currentNode.meta.contentType = value.defaultValue; if(value.defaultValue.substr(0,"multipart/".length)=="multipart/"){ this._currentNode.meta.mimeMultipart = value.defaultValue.substr("multipart/".length); } }else{ this._currentNode.meta.contentType = "application/octet-stream"; } if(value.charset){ value.charset = value.charset.toLowerCase(); if(value.charset.substr(0,4)=="win-"){ value.charset = "windows-"+value.charset.substr(4); }else if(value.charset == "ks_c_5601-1987"){ value.charset = "cp949"; }else if(value.charset.match(/^utf\d/)){ value.charset = "utf-"+value.charset.substr(3); }else if(value.charset.match(/^latin[\-_]?\d/)){ value.charset = "iso-8859-"+value.charset.replace(/\D/g,""); }else if(value.charset.match(/^(us\-)?ascii$/)){ value.charset = "utf-8"; } this._currentNode.meta.charset = value.charset; } if(value.format){ this._currentNode.meta.textFormat = value.format.toLowerCase(); } if(value.delsp){ this._currentNode.meta.textDelSp = value.delsp.toLowerCase(); } if(value.boundary){ this._currentNode.meta.mimeBoundary = value.boundary; } if(!this._currentNode.meta.fileName && (fileName = this._detectFilename(value))){ this._currentNode.meta.fileName = fileName; } if(value.boundary){ this._currentNode.meta.mimeBoundary = value.boundary; this._multipartTree.push({ boundary: value.boundary, node: this._currentNode }); } } return value; }; /** *Parses file name from a Content-Type or Content-Disposition field
* *Supports RFC2231 for * folded filenames
* * @param {Object} value Parsed Content-(Type|Disposition) object * @return {String} filename */ MailParser.prototype._detectFilename = function(value){ var fileName="", i=0, parts, encoding, name; if(value.name){ return this._replaceMimeWords(value.name); } if(value.filename){ return this._replaceMimeWords(value.filename); } // RFC2231 if(value["name*"]){ fileName = value["name*"]; }else if(value["filename*"]){ fileName = value["filename*"]; }else if(value["name*0*"]){ while(value["name*"+(i)+"*"]){ fileName += value["name*"+(i++)+"*"]; } }else if(value["filename*0*"]){ while(value["filename*"+(i)+"*"]){ fileName += value["filename*"+(i++)+"*"]; } } if(fileName){ parts = fileName.split("'"); encoding = parts.shift(); name = parts.pop(); if(name){ return this._replaceMimeWords(this._replaceMimeWords("=?"+(encoding || "us-ascii")+"?Q?" + name.replace(/%/g,"=")+"?=")); } } return ""; }; /** *Parses Content-Disposition header field value
* *Fetches filename to current node meta object
* * @param {String} value A Content-Disposition header field */ MailParser.prototype._parseContentDisposition = function(value){ var fileName; value = this._parseHeaderLineWithParams(value); if(value){ if(value.defaultValue){ this._currentNode.meta.contentDisposition = value.defaultValue.trim().toLowerCase(); } if((fileName = this._detectFilename(value))){ this._currentNode.meta.fileName = fileName; } } }; /** *Parses "References" header
* * @param {String} value References header field */ MailParser.prototype._parseReferences = function(value){ this._currentNode.references = (this._currentNode.references || []).concat( (value || "").toString(). trim(). split(/\s+/). map(this._trimQuotes.bind(this)) ); }; /** *Parses "In-Reply-To" header
* * @param {String} value In-Reply-To header field */ MailParser.prototype._parseInReplyTo = function(value){ this._currentNode.inReplyTo = (this._currentNode.inReplyTo || []).concat( (value || "").toString(). trim(). split(/\s+/). map(this._trimQuotes.bind(this)) ); }; /** *Parses the priority of the e-mail
* * @param {String} value The priority value * @returns {String} priority string low|normal|high */ MailParser.prototype._parsePriority = function(value){ value = value.toLowerCase().trim(); if(!isNaN(parseInt(value,10))){ // support "X-Priority: 1 (Highest)" value = parseInt(value, 10) || 0; if(value == 3){ return "normal"; }else if(value > 3){ return "low"; }else{ return "high"; } }else{ switch(value){ case "non-urgent": case "low": return "low"; case "urgent": case "hight": return "high"; } } return "normal"; }; /** *Processes a line in text/html or text/plain node
* *Append the line to the content property
* * @param {String} line A line to be processed */ MailParser.prototype._handleTextLine = function(line){ if(["quoted-printable", "base64"].indexOf(this._currentNode.meta.transferEncoding)>=0 || this._currentNode.meta.textFormat != "flowed"){ if(typeof this._currentNode.content != "string"){ this._currentNode.content = line; }else{ this._currentNode.content += "\n"+line; } }else{ if(typeof this._currentNode.content != "string"){ this._currentNode.content = line; }else if(this._currentNode.content.match(/[ ]$/)){ if(this._currentNode.meta.textFormat == "flowed" && this._currentNode.content.match(/(^|\n)-- $/)){ // handle special case for usenet signatures this._currentNode.content += "\n"+line; }else{ if(this._currentNode.meta.textDelSp == "yes"){ this._currentNode.content = this._currentNode.content.replace(/[ ]+$/,""); } this._currentNode.content += line; } }else{ this._currentNode.content += "\n"+line; } } }; /** *Processes a line in an attachment node
* *If a stream is set up for the attachment write the line to the * stream as a Buffer object, otherwise append it to the content property
* * @param {String} line A line to be processed */ MailParser.prototype._handleAttachmentLine = function(line){ if(!this._currentNode.attachment){ return; } if(this._currentNode.stream){ if(!this._currentNode.streamStarted){ this._currentNode.streamStarted = true; this._currentNode.stream.write(new Buffer(line, "binary")); }else{ this._currentNode.stream.write(new Buffer("\r\n"+line, "binary")); } }else if("content" in this._currentNode){ if(typeof this._currentNode.content!="string"){ this._currentNode.content = line; }else{ this._currentNode.content += "\r\n" + line; } } }; /** *Finalizes a node processing
* *If the node is a text/plain or text/html, convert it to UTF-8 encoded string * If it is an attachment, convert it to a Buffer or if an attachment stream is * set up, close the stream
*/ MailParser.prototype._finalizeContents = function(){ var streamInfo; if(this._currentNode.content){ if(!this._currentNode.attachment){ if(this._currentNode.meta.contentType == "text/html"){ this._currentNode.meta.charset = this._detectHTMLCharset(this._currentNode.content) || this._currentNode.meta.charset || this.options.defaultCharset || "iso-8859-1"; } if(this._currentNode.meta.transferEncoding == "quoted-printable"){ this._currentNode.content = mimelib.decodeQuotedPrintable(this._currentNode.content, false, this._currentNode.meta.charset || this.options.defaultCharset || "iso-8859-1"); if(this._currentNode.meta.textFormat == "flowed"){ if(this._currentNode.meta.textDelSp == "yes"){ this._currentNode.content = this._currentNode.content.replace(/(^|\n)-- \n/g, '$1-- \u0000').replace(/ \n/g, '').replace(/(^|\n)-- \u0000/g, '$1-- \n'); }else{ this._currentNode.content = this._currentNode.content.replace(/(^|\n)-- \n/g, '$1-- \u0000').replace(/ \n/g, ' ').replace(/(^|\n)-- \u0000/g, '$1-- \n'); } } }else if(this._currentNode.meta.transferEncoding == "base64"){ this._currentNode.content = mimelib.decodeBase64(this._currentNode.content, this._currentNode.meta.charset || this.options.defaultCharset || "iso-8859-1"); }else{ this._currentNode.content = this._convertStringToUTF8(this._currentNode.content); } }else{ if(this._currentNode.meta.transferEncoding == "quoted-printable"){ this._currentNode.content = mimelib.decodeQuotedPrintable(this._currentNode.content, false, "binary"); }else if(this._currentNode.meta.transferEncoding == "base64"){ this._currentNode.content = new Buffer(this._currentNode.content, "base64"); }else{ this._currentNode.content = new Buffer(this._currentNode.content, "binary"); } this._currentNode.checksum.update(this._currentNode.content); this._currentNode.meta.checksum = this._currentNode.checksum.digest("hex"); this._currentNode.meta.length = this._currentNode.content.length; } } if(this._currentNode.stream){ streamInfo = this._currentNode.stream.end() || {}; if(streamInfo.checksum){ this._currentNode.meta.checksum = streamInfo.checksum; } if(streamInfo.length){ this._currentNode.meta.length = streamInfo.length; } } }; /** *Processes the mime tree
* *Finds text parts and attachments from the tree. If there's several text/plain * or text/html parts, join these into one
* *Emits "end" when finished
*/ MailParser.prototype._processMimeTree = function(){ var returnValue = {}, i, len; this.mailData = {html:[], text:[], attachments:[]}; if(!this.mimeTree.meta.mimeMultipart){ this._processMimeNode(this.mimeTree, 0); }else{ this._walkMimeTree(this.mimeTree); } if(this.mailData.html.length){ for(i=0, len=this.mailData.html.length; iPushes the node into appropriate this.mailData
array (text/html
to this.mailData.html
array etc)
* * @param {Object} htmlNode Original HTML contents node object * @param {String} newHTML HTML text to add to the original object node */ MailParser.prototype._joinHTMLNodes = function(htmlNode, newHTML){ var inserted = false; // process new HTML newHTML = (newHTML || "").toString("utf-8").trim(); // remove doctype from the beginning newHTML = newHTML.replace(/^\s*<\!doctype( [^>]*)?>/gi, ""); // remove
and blocks newHTML = newHTML.replace(/]*)?>(.*)<\/head( [^>]*)?>/gi, ""). replace(/<\/?html( [^>]*)?>/gi, ""). trim(); // keep only text between tags (if ]*)?>(.*)<\/body( [^>]*)?>/gi, function(match, body){ newHTML = body.trim(); }); htmlNode.content = (htmlNode.content || "").toString("utf-8").trim(); htmlNode.content = htmlNode.content.replace(/<\/body( [^>]*)?>/i, function(match){ inserted = true; return "Adds filename placeholder to the HTML if needed
* * @param {Object} htmlNode Original HTML contents node object * @param {String} attachment Attachment meta object */ MailParser.prototype._joinHTMLAttachment = function(htmlNode, attachment){ var inserted = false, fname = attachment.generatedFileName.replace(//g, ">").replace(/"/g, """), newHTML; newHTML = "\n "; htmlNode.content = (htmlNode.content || "").toString("utf-8").trim(); htmlNode.content = htmlNode.content.replace(/<\/body\b[^>]*>/i, function(match){ inserted = true; return "Joins two HTML blocks by removing the header of the added element
* * @param {Sting} htmlNode Original HTML contents * @param {String} newHTML HTML text to add to the original object node * @return {String} Joined HTML */ MailParser.prototype._concatHTML = function(firstNode, secondNode){ var headerNode = "", htmlHeader = ""; firstNode = (firstNode || "").toString("utf-8"); secondNode = (secondNode || "").toString("utf-8"); if(!secondNode){ return firstNode; } if(!firstNode){ return secondNode; } if(firstNode.substr(0, 1024).replace(/\r?\n/g,"\u0000").match(/^[\s\u0000]*(<\!doctype\b[^>]*?>)?[\s\u0000]*<(html|head)\b[^>]*?>/i)){ headerNode = firstNode; }else if(secondNode.substr(0, 1024).replace(/\r?\n/g,"\u0000").match(/^[\s\u0000]*(<\!doctype\b[^>]*?>)?[\s\u0000]*<(html|head)\b[^>]*?>/i)){ headerNode = secondNode; } if(headerNode){ headerNode.replace(/\r?\n/g, "\u0000").replace(/^[\s\u0000]*(<\!doctype\b[^>]*?>)?[\s\u0000]*<(html|head)\b[^>]*>.*?<\/(head)\b[^>]*>(.*?
]*>)?/i, function(h){ var doctype = h.match(/^[\s\u0000]*(<\!doctype\b[^>]*?>)/i), html = h.match(/]*?>/i), head = h.match(/]*?>/i), body = h.match(/]*?>/i); doctype = doctype && doctype[1] && doctype[1] + "\n" || ""; html = html && html[0] || ""; head = head && head[0] || ""; body = body && body[0] || ""; h = h.replace(/<[\!\/]?(doctype|html|head|body)\b[^>]*?>/ig, "\u0000").replace(/\u0000+/g, "\n").trim(); htmlHeader = doctype + html + "\n" + head + (h ? h + "\n" : "") + "\n" + body + "\n"; }); } firstNode = firstNode.replace(/\r?\n/g, "\u0000"). replace(/[\s\u0000]*]*>.*?<\/(head|body)\b[^>]*>/gi, ""). replace(/[\s\u0000]*<[\!\/]?(doctype|html|body)\b[^>]*>[\s\u0000]*/gi, ""). replace(/\u0000/g, "\n"); secondNode = secondNode.replace(/\r?\n/g, "\u0000"). replace(/[\s\u0000]*]*>.*?<\/(head|body)\b[^>]*>/gi, ""). replace(/[\s\u0000]*<[\!\/]?(doctype|html|body)\b[^>]*>[\s\u0000]*/gi, ""). replace(/\u0000/g, "\n"); return htmlHeader + firstNode + secondNode + (htmlHeader? (firstNode || secondNode ? "\n" : "") + "\n" : ""); }; /** *Converts a string from one charset to another
* * @param {Buffer|String} value A String to be converted * @param {String} fromCharset source charset * @param {String} [toCharset="UTF-8"] destination charset * @returns {Buffer} Converted string as a Buffer (or SlowBuffer) */ MailParser.prototype._convertString = function(value, fromCharset, toCharset){ toCharset = (toCharset || "utf-8").toUpperCase(); fromCharset = (fromCharset || "utf-8").toUpperCase(); value = typeof value=="string"?new Buffer(value, "binary"):value; if(toCharset == fromCharset){ return value; } value = encodinglib.convert(value, toCharset, fromCharset); return value; }; /** *Converts a string to UTF-8
* * @param {String} value String to be encoded * @returns {String} UTF-8 encoded string */ MailParser.prototype._convertStringToUTF8 = function(value){ value = this._convertString(value, this._currentNode.meta.charset || this.options.defaultCharset || "iso-8859-1").toString("utf-8"); return value; }; /** *Encodes a header string to UTF-8
* * @param {String} value String to be encoded * @returns {String} UTF-8 encoded string */ MailParser.prototype._encodeString = function(value){ value = this._replaceMimeWords(this._convertStringToUTF8(value)); return value; }; /** *Replaces mime words in a string with UTF-8 encoded strings
* * @param {String} value String to be converted * @returns {String} converted string */ MailParser.prototype._replaceMimeWords = function(value){ return value. replace(/(=\?[^?]+\?[QqBb]\?[^?]+\?=)\s+(?==\?[^?]+\?[QqBb]\?[^?]+\?=)/g, "$1"). // join mimeWords replace(/\=\?[^?]+\?[QqBb]\?[^?]+\?=/g, (function(a){ return mimelib.decodeMimeWord(a.replace(/\s/g,'')); }).bind(this)); }; /** *Removes enclosing quotes ("", '', <>) from a string
* * @param {String} value String to be converted * @returns {String} converted string */ MailParser.prototype._trimQuotes = function(value){ value = (value || "").trim(); if((value.charAt(0)=='"' && value.charAt(value.length-1)=='"') || (value.charAt(0)=="'" && value.charAt(value.length-1)=="'") || (value.charAt(0)=="<" && value.charAt(value.length-1)==">")){ value = value.substr(1,value.length-2); } return value; }; /** *Generates a context unique filename for an attachment
* *If a filename already exists, append a number to it
* *Replaces character set to UTF-8 in HTML <meta> tags
* * @param {String} HTML html contents * @returns {String} updated HTML */ MailParser.prototype._updateHTMLCharset = function(html){ html = html.replace(/\n/g,"\u0000"). replace(/]*>/gi, function(meta){ if(meta.match(/http\-equiv\s*=\s*"?content\-type/i)){ return ''; } if(meta.match(/\scharset\s*=\s*['"]?[\w\-]+["'\s>\/]/i)){ return ''; } return meta; }). replace(/\u0000/g,"\n"); return html; }; /** *Detects the charset of an HTML file
* * @param {String} HTML html contents * @returns {String} Charset for the HTML */ MailParser.prototype._detectHTMLCharset = function(html){ var charset, input, meta; if(typeof html !=" string"){ html = html.toString("ascii"); } if((meta = html.match(/]*?>/i))){ input = meta[0]; } if(input){ charset = input.match(/charset\s?=\s?([a-zA-Z\-_:0-9]*);?/); if(charset){ charset = (charset[1] || "").trim().toLowerCase(); } } if(!charset && (meta = html.match(/