From e410a7c7df3dc194f4dfd09fbee9570d83102627 Mon Sep 17 00:00:00 2001 From: s2 Date: Mon, 13 Nov 2017 15:06:12 +0100 Subject: [PATCH] mailparser --- .npmignore | 3 + .travis.yml | 12 + LICENSE | 16 + README.md | 217 +++++++ lib/datetime.js | 304 ++++++++++ lib/mailparser.js | 1329 ++++++++++++++++++++++++++++++++++++++++++ lib/streams.js | 145 +++++ package.json | 77 +++ test/mailparser.js | 1337 +++++++++++++++++++++++++++++++++++++++++++ test/nested.eml | 63 ++ test/windowsfail.js | 22 + 11 files changed, 3525 insertions(+) create mode 100644 .npmignore create mode 100644 .travis.yml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 lib/datetime.js create mode 100644 lib/mailparser.js create mode 100644 lib/streams.js create mode 100644 package.json create mode 100644 test/mailparser.js create mode 100644 test/nested.eml create mode 100644 test/windowsfail.js diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..8d4c2d1 --- /dev/null +++ b/.npmignore @@ -0,0 +1,3 @@ +node_modules +.DS_Store +npm-debug.log \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..c8f68ef --- /dev/null +++ b/.travis.yml @@ -0,0 +1,12 @@ +language: node_js +node_js: + - 0.6 + - 0.8 + - 0.9 + +notifications: + email: + recipients: + - andris@node.ee + on_success: change + on_failure: change diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a47b0ea --- /dev/null +++ b/LICENSE @@ -0,0 +1,16 @@ +Copyright (c) 2012 Andris Reinman + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..bcfc69e --- /dev/null +++ b/README.md @@ -0,0 +1,217 @@ +MailParser +========== + +[![Build Status](https://secure.travis-ci.org/andris9/mailparser.png)](http://travis-ci.org/andris9/mailparser) +[![NPM version](https://badge.fury.io/js/mailparser.png)](http://badge.fury.io/js/mailparser) + +**MailParser** is an asynchronous and non-blocking parser for +[node.js](http://nodejs.org) to parse mime encoded e-mail messages. +Handles even large attachments with ease - attachments can be parsed +in chunks and streamed if needed. + +**MailParser** parses raw source of e-mail messages into a structured object. + +No need to worry about charsets or decoding *quoted-printable* or +*base64* data, **MailParser** does all of it for you. All the textual output +from **MailParser** (subject line, addressee names, message body) is always UTF-8. + +For a 25MB e-mail it takes less than a second to parse if attachments are not streamed but buffered and about 3-4 seconds if they are streamed. Expect high RAM usage though if you do not stream the attachments. + +If you want to send e-mail instead of parsing it, check out my other module [Nodemailer](/andris9/Nodemailer). + +## Support mailparser development + +[![Donate to author](https://www.paypalobjects.com/en_US/i/btn/btn_donate_SM.gif)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=DB26KWR2BQX5W) + +Installation +------------ + + npm install mailparser + +Usage +----- + +Require MailParser module + + var MailParser = require("mailparser").MailParser; + +Create a new MailParser object + + var mailparser = new MailParser([options]); + +Options parameter is an object with the following properties: + + * **debug** - if set to true print all incoming lines to console + * **streamAttachments** - if set to true, stream attachments instead of including them + * **unescapeSMTP** - if set to true replace double dots in the beginning of the file + * **defaultCharset** - the default charset for *text/plain* and *text/html* content, if not set reverts to *Latin-1* + * **showAttachmentLinks** - if set to true, show inlined attachment links `filename` + +MailParser object is a writable Stream - you can pipe directly +files to it or you can send chunks with `mailparser.write` + +When the headers have received, "headers" is emitted. The headers have not been pre-processed (except that mime words have been converted to UTF-8 text). + + mailparser.on("headers", function(headers){ + console.log(headers.received); + }); + +When the parsing ends an `'end'` event is emitted which has an +object with parsed e-mail structure as a parameter. + + mailparser.on("end", function(mail){ + mail; // object structure for parsed e-mail + }); + +### Parsed mail object + + * **headers** - unprocessed headers in the form of - `{key: value}` - if there were multiple fields with the same key then the value is an array + * **from** - an array of parsed `From` addresses - `[{address:'sender@example.com',name:'Sender Name'}]` (should be only one though) + * **to** - an array of parsed `To` addresses + * **cc** - an array of parsed `Cc` addresses + * **bcc** - an array of parsed 'Bcc' addresses + * **subject** - the subject line + * **references** - an array of reference message id values (not set if no reference values present) + * **inReplyTo** - an array of In-Reply-To message id values (not set if no in-reply-to values present) + * **priority** - priority of the e-mail, always one of the following: *normal* (default), *high*, *low* + * **text** - text body + * **html** - html body + * **attachments** - an array of attachments + +### Decode a simple e-mail + +This example decodes an e-mail from a string + + var MailParser = require("mailparser").MailParser, + mailparser = new MailParser(); + + var email = "From: 'Sender Name' \r\n"+ + "To: 'Receiver Name' \r\n"+ + "Subject: Hello world!\r\n"+ + "\r\n"+ + "How are you today?"; + + // setup an event listener when the parsing finishes + mailparser.on("end", function(mail_object){ + console.log("From:", mail_object.from); //[{address:'sender@example.com',name:'Sender Name'}] + console.log("Subject:", mail_object.subject); // Hello world! + console.log("Text body:", mail_object.text); // How are you today? + }); + + // send the email source to the parser + mailparser.write(email); + mailparser.end(); + +### Pipe file to MailParser + +This example pipes a `readableStream` file to **MailParser** + + var MailParser = require("mailparser").MailParser, + mailparser = new MailParser(), + fs = require("fs"); + + mailparser.on("end", function(mail_object){ + console.log("Subject:", mail_object.subject); + }); + + fs.createReadStream("email.eml").pipe(mailparser); + +### Attachments + +By default any attachment found from the e-mail will be included fully in the +final mail structure object as Buffer objects. With large files this might not +be desirable so optionally it is possible to redirect the attachments to a Stream +and keep only the metadata about the file in the mail structure. + + mailparser.on("end", function(mail_object){ + for(var i=0; i + }]; + +The property `generatedFileName` is usually the same as `fileName` but if several +different attachments with the same name exist or there is no `fileName` set, an +unique name is generated. + +Property `content` is always a Buffer object (or SlowBuffer on some occasions) + +#### Attachment streaming + +Attachment streaming can be used when providing an optional options parameter +to the `MailParser` constructor. + + var mp = new MailParser({ + streamAttachments: true + } + +This way there will be no `content` property on final attachment objects +(but the other fields will remain). + +To catch the streams you should listen for `attachment` events on the MailParser +object. The parameter provided includes file information (`contentType`, +`fileName`, `contentId`) and a readable Stream object `stream`. + + var mp = new MailParser({ + streamAttachments: true + } + + mp.on("attachment", function(attachment){ + var output = fs.createWriteStream(attachment.generatedFileName); + attachment.stream.pipe(output); + }); + +`generatedFileName` is unique for the parsed mail - if several attachments with +the same name exist, `generatedFileName` is updated accordingly. Also there +might not be `fileName` parameter at all, so it is better to rely on +`generatedFileName`. + +#### Testing attachment integrity + +Attachment objects include `length` property which is the length of the attachment +in bytes and `checksum` property which is a `md5` hash of the file. + +### Running tests + +Install **MailParser** with dev dependencies + + npm install --dev mailparser + +And then run + + npm test mailparser + +There aren't many tests yet but basics should be covered. + +## Issues + +**S/MIME** + +Currently it is not possible to verify signed content as the incoming text is +split to lines when parsing and line ending characters are not preserved. One +can assume it is always \r\n but this might not be always the case. + +**Seeking** + +Due to the line based parsing it is also not possible to explicitly state +the beginning and ending bytes of the attachments for later source seeking. +Node.js doesn't support the concept of seeking very well anyway. + +## License + +**MIT** diff --git a/lib/datetime.js b/lib/datetime.js new file mode 100644 index 0000000..881055e --- /dev/null +++ b/lib/datetime.js @@ -0,0 +1,304 @@ +/* + * More info at: http://phpjs.org + * + * This is version: 3.18 + * php.js is copyright 2010 Kevin van Zonneveld. + * + * Portions copyright Brett Zamir (http://brett-zamir.me), Kevin van Zonneveld + * (http://kevin.vanzonneveld.net), Onno Marsman, Theriault, Michael White + * (http://getsprink.com), Waldo Malqui Silva, Paulo Freitas, Jonas Raoni + * Soares Silva (http://www.jsfromhell.com), Jack, Philip Peterson, Ates Goral + * (http://magnetiq.com), Legaev Andrey, Ratheous, Alex, Martijn Wieringa, + * Nate, lmeyrick (https://sourceforge.net/projects/bcmath-js/), Philippe + * Baumann, Enrique Gonzalez, Webtoolkit.info (http://www.webtoolkit.info/), + * Ash Searle (http://hexmen.com/blog/), travc, Jani Hartikainen, Carlos R. L. + * Rodrigues (http://www.jsfromhell.com), Ole Vrijenhoek, WebDevHobo + * (http://webdevhobo.blogspot.com/), T.Wild, + * http://stackoverflow.com/questions/57803/how-to-convert-decimal-to-hex-in-javascript, + * pilus, GeekFG (http://geekfg.blogspot.com), Rafał Kukawski + * (http://blog.kukawski.pl), Johnny Mast (http://www.phpvrouwen.nl), Michael + * Grier, Erkekjetter, d3x, marrtins, Andrea Giammarchi + * (http://webreflection.blogspot.com), stag019, mdsjack + * (http://www.mdsjack.bo.it), Chris, Steven Levithan + * (http://blog.stevenlevithan.com), Arpad Ray (mailto:arpad@php.net), David, + * Joris, Tim de Koning (http://www.kingsquare.nl), Marc Palau, Michael White, + * Public Domain (http://www.json.org/json2.js), gettimeofday, felix, Aman + * Gupta, Pellentesque Malesuada, Thunder.m, Tyler Akins (http://rumkin.com), + * Karol Kowalski, Felix Geisendoerfer (http://www.debuggable.com/felix), + * Alfonso Jimenez (http://www.alfonsojimenez.com), Diplom@t + * (http://difane.com/), majak, Mirek Slugen, Mailfaker + * (http://www.weedem.fr/), Breaking Par Consulting Inc + * (http://www.breakingpar.com/bkp/home.nsf/0/87256B280015193F87256CFB006C45F7), + * Josh Fraser + * (http://onlineaspect.com/2007/06/08/auto-detect-a-time-zone-with-javascript/), + * Martin (http://www.erlenwiese.de/), Paul Smith, KELAN, Robin, saulius, AJ, + * Oleg Eremeev, Steve Hilder, gorthaur, Kankrelune + * (http://www.webfaktory.info/), Caio Ariede (http://caioariede.com), Lars + * Fischer, Sakimori, Imgen Tata (http://www.myipdf.com/), uestla, Artur + * Tchernychev, Wagner B. Soares, Christoph, nord_ua, class_exists, Der Simon + * (http://innerdom.sourceforge.net/), echo is bad, XoraX + * (http://www.xorax.info), Ozh, Alan C, Taras Bogach, Brad Touesnard, MeEtc + * (http://yass.meetcweb.com), Peter-Paul Koch + * (http://www.quirksmode.org/js/beat.html), T0bsn, Tim Wiel, Bryan Elliott, + * jpfle, JT, Thomas Beaucourt (http://www.webapp.fr), David Randall, Frank + * Forte, Eugene Bulkin (http://doubleaw.com/), noname, kenneth, Hyam Singer + * (http://www.impact-computing.com/), Marco, Raphael (Ao RUDLER), Ole + * Vrijenhoek (http://www.nervous.nl/), David James, Steve Clay, Jason Wong + * (http://carrot.org/), T. Wild, Paul, J A R, LH, strcasecmp, strcmp, JB, + * Daniel Esteban, strftime, madipta, Valentina De Rosa, Marc Jansen, + * Francesco, Stoyan Kyosev (http://www.svest.org/), metjay, Soren Hansen, + * 0m3r, Sanjoy Roy, Shingo, sankai, sowberry, hitwork, Rob, Norman "zEh" + * Fuchs, Subhasis Deb, josh, Yves Sucaet, Ulrich, Scott Baker, ejsanders, + * Nick Callen, Steven Levithan (stevenlevithan.com), Aidan Lister + * (http://aidanlister.com/), Philippe Jausions + * (http://pear.php.net/user/jausions), Zahlii, Denny Wardhana, Oskar Larsson + * Högfeldt (http://oskar-lh.name/), Brian Tafoya + * (http://www.premasolutions.com/), johnrembo, Gilbert, duncan, Thiago Mata + * (http://thiagomata.blog.com), Alexander Ermolaev + * (http://snippets.dzone.com/user/AlexanderErmolaev), Linuxworld, lmeyrick + * (https://sourceforge.net/projects/bcmath-js/this.), Jon Hohle, Pyerre, + * merabi, Saulo Vallory, HKM, ChaosNo1, djmix, Lincoln Ramsay, Adam Wallner + * (http://web2.bitbaro.hu/), paulo kuong, jmweb, Orlando, kilops, dptr1988, + * DxGx, Pedro Tainha (http://www.pedrotainha.com), Bayron Guevara, Le Torbi, + * James, Douglas Crockford (http://javascript.crockford.com), Devan + * Penner-Woelk, Jay Klehr, Kheang Hok Chin (http://www.distantia.ca/), Luke + * Smith (http://lucassmith.name), Rival, Amir Habibi + * (http://www.residence-mixte.com/), Blues (http://tech.bluesmoon.info/), Ben + * Bryan, booeyOH, Dreamer, Cagri Ekin, Diogo Resende, Howard Yeend, Pul, + * 3D-GRAF, jakes, Yannoo, Luke Godfrey, daniel airton wermann + * (http://wermann.com.br), Allan Jensen (http://www.winternet.no), Benjamin + * Lupton, davook, Atli Þór, Maximusya, Leslie Hoare, Bug?, setcookie, YUI + * Library: http://developer.yahoo.com/yui/docs/YAHOO.util.DateLocale.html, + * Blues at http://hacks.bluesmoon.info/strftime/strftime.js, Andreas, + * Michael, Christian Doebler, Gabriel Paderni, Marco van Oort, Philipp + * Lenssen, Arnout Kazemier (http://www.3rd-Eden.com), penutbutterjelly, Anton + * Ongson, DtTvB (http://dt.in.th/2008-09-16.string-length-in-bytes.html), + * meo, Greenseed, Yen-Wei Liu, mk.keck, William, rem, Jamie Beck + * (http://www.terabit.ca/), Russell Walker (http://www.nbill.co.uk/), + * Garagoth, Dino, Andrej Pavlovic, gabriel paderni, FGFEmperor, Scott Cariss, + * Slawomir Kaniecki, ReverseSyntax, Mateusz "loonquawl" Zalega, Francois, + * Kirk Strobeck, Billy, vlado houba, Jalal Berrami, date, Itsacon + * (http://www.itsacon.net/), Martin Pool, Pierre-Luc Paour, ger, john + * (http://www.jd-tech.net), mktime, Simon Willison + * (http://simonwillison.net), Nick Kolosov (http://sammy.ru), marc andreu, + * Arno, Nathan, Kristof Coomans (SCK-CEN Belgian Nucleair Research Centre), + * Fox, nobbler, stensi, Matteo, Riddler (http://www.frontierwebdev.com/), + * Tomasz Wesolowski, T.J. Leahy, rezna, Eric Nagel, Alexander M Beedie, baris + * ozdil, Greg Frazier, Bobby Drake, Ryan W Tenney (http://ryan.10e.us), Tod + * Gentille, Rafał Kukawski, FremyCompany, Manish, Cord, fearphage + * (http://http/my.opera.com/fearphage/), Victor, Brant Messenger + * (http://www.brantmessenger.com/), Matt Bradley, Luis Salazar + * (http://www.freaky-media.com/), Tim de Koning, taith, Rick Waldron, Mick@el + * + * Dual licensed under the MIT (MIT-LICENSE.txt) + * and GPL (GPL-LICENSE.txt) licenses. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL KEVIN VAN ZONNEVELD BE LIABLE FOR ANY CLAIM, DAMAGES + * OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +this.strtotime = function(str, now) { + // http://kevin.vanzonneveld.net + // + original by: Caio Ariede (http://caioariede.com) + // + improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) + // + input by: David + // + improved by: Caio Ariede (http://caioariede.com) + // + improved by: Brett Zamir (http://brett-zamir.me) + // + bugfixed by: Wagner B. Soares + // + bugfixed by: Artur Tchernychev + // % note 1: Examples all have a fixed timestamp to prevent tests to fail because of variable time(zones) + // * example 1: strtotime('+1 day', 1129633200); + // * returns 1: 1129719600 + // * example 2: strtotime('+1 week 2 days 4 hours 2 seconds', 1129633200); + // * returns 2: 1130425202 + // * example 3: strtotime('last month', 1129633200); + // * returns 3: 1127041200 + // * example 4: strtotime('2009-05-04 08:30:00'); + // * returns 4: 1241418600 + + var i, match, s, strTmp = '', parse = ''; + + strTmp = str; + strTmp = strTmp.replace(/\s{2,}|^\s|\s$/g, ' '); // unecessary spaces + strTmp = strTmp.replace(/[ \r\n]/g, ''); // unecessary chars + + if (strTmp == 'now') { + return (new Date()).getTime()/1000; // Return seconds, not milli-seconds + } else if (!isNaN(parse = Date.parse(strTmp))) { + return (parse/1000); + } else if (now) { + now = new Date(now*1000); // Accept PHP-style seconds + } else { + now = new Date(); + } + + strTmp = strTmp.toLowerCase(); + + var __is = + { + day: + { + 'sun': 0, + 'mon': 1, + 'tue': 2, + 'wed': 3, + 'thu': 4, + 'fri': 5, + 'sat': 6 + }, + mon: + { + 'jan': 0, + 'feb': 1, + 'mar': 2, + 'apr': 3, + 'may': 4, + 'jun': 5, + 'jul': 6, + 'aug': 7, + 'sep': 8, + 'oct': 9, + 'nov': 10, + 'dec': 11 + } + }; + + var process = function (m) { + var ago = (m[2] && m[2] == 'ago'); + var num = (num = m[0] == 'last' ? -1 : 1) * (ago ? -1 : 1); + + switch (m[0]) { + case 'last': + case 'next': + switch (m[1].substring(0, 3)) { + case 'yea': + now.setFullYear(now.getFullYear() + num); + break; + case 'mon': + now.setMonth(now.getMonth() + num); + break; + case 'wee': + now.setDate(now.getDate() + (num * 7)); + break; + case 'day': + now.setDate(now.getDate() + num); + break; + case 'hou': + now.setHours(now.getHours() + num); + break; + case 'min': + now.setMinutes(now.getMinutes() + num); + break; + case 'sec': + now.setSeconds(now.getSeconds() + num); + break; + default: + var day; + if (typeof (day = __is.day[m[1].substring(0, 3)]) != 'undefined') { + var diff = day - now.getDay(); + if (diff == 0) { + diff = 7 * num; + } else if (diff > 0) { + if (m[0] == 'last') {diff -= 7;} + } else { + if (m[0] == 'next') {diff += 7;} + } + now.setDate(now.getDate() + diff); + } + } + break; + + default: + if (/\d+/.test(m[0])) { + num *= parseInt(m[0], 10); + + switch (m[1].substring(0, 3)) { + case 'yea': + now.setFullYear(now.getFullYear() + num); + break; + case 'mon': + now.setMonth(now.getMonth() + num); + break; + case 'wee': + now.setDate(now.getDate() + (num * 7)); + break; + case 'day': + now.setDate(now.getDate() + num); + break; + case 'hou': + now.setHours(now.getHours() + num); + break; + case 'min': + now.setMinutes(now.getMinutes() + num); + break; + case 'sec': + now.setSeconds(now.getSeconds() + num); + break; + } + } else { + return false; + } + break; + } + return true; + }; + + match = strTmp.match(/^(\d{2,4}-\d{2}-\d{2})(?:\s(\d{1,2}:\d{2}(:\d{2})?)?(?:\.(\d+))?)?$/); + if (match != null) { + if (!match[2]) { + match[2] = '00:00:00'; + } else if (!match[3]) { + match[2] += ':00'; + } + + s = match[1].split(/-/g); + + for (i in __is.mon) { + if (__is.mon[i] == s[1] - 1) { + s[1] = i; + } + } + s[0] = parseInt(s[0], 10); + + s[0] = (s[0] >= 0 && s[0] <= 69) ? '20'+(s[0] < 10 ? '0'+s[0] : s[0]+'') : (s[0] >= 70 && s[0] <= 99) ? '19'+s[0] : s[0]+''; + return parseInt(this.strtotime(s[2] + ' ' + s[1] + ' ' + s[0] + ' ' + match[2])+(match[4] ? match[4]/1000 : ''), 10); + } + + var regex = '([+-]?\\d+\\s'+ + '(years?|months?|weeks?|days?|hours?|min|minutes?|sec|seconds?'+ + '|sun\\.?|sunday|mon\\.?|monday|tue\\.?|tuesday|wed\\.?|wednesday'+ + '|thu\\.?|thursday|fri\\.?|friday|sat\\.?|saturday)'+ + '|(last|next)\\s'+ + '(years?|months?|weeks?|days?|hours?|min|minutes?|sec|seconds?'+ + '|sun\\.?|sunday|mon\\.?|monday|tue\\.?|tuesday|wed\\.?|wednesday'+ + '|thu\\.?|thursday|fri\\.?|friday|sat\\.?|saturday))'+ + '(\\sago)?'; + + match = strTmp.match(new RegExp(regex, 'gi')); // Brett: seems should be case insensitive per docs, so added 'i' + if (match == null) { + return false; + } + + for (i = 0; i < match.length; i++) { + if (!process(match[i].split(' '))) { + return false; + } + } + + return (now.getTime()/1000); +} \ No newline at end of file diff --git a/lib/mailparser.js b/lib/mailparser.js new file mode 100644 index 0000000..c9d0612 --- /dev/null +++ b/lib/mailparser.js @@ -0,0 +1,1329 @@ + +/** + * @fileOverview This is the main file for the MailParser library to parse raw e-mail data + * @author Andris Reinman + * @version 0.2.23 + */ + +var Stream = require("stream").Stream, + utillib = require("util"), + mimelib = require("mimelib"), + datetime = require("./datetime"), + encodinglib = require("encoding"), + Streams = require("./streams"), + crypto = require("crypto"), + mime = require("mime"); + +// Expose to the world +module.exports.MailParser = MailParser; + +// MailParser is a FSM - it is always in one of the possible states +var STATES = { + header: 0x1, + body: 0x2, + finished: 0x3 +}; + +/** + *

Creates instance of MailParser which in turn extends Stream

+ * + *

Options object has the following properties:

+ * + *
    + *
  • debug - if set to true print all incoming lines to decodeq
  • + *
  • streamAttachments - if set to true, stream attachments instead of including them
  • + *
  • unescapeSMTP - if set to true replace double dots in the beginning of the file
  • + *
  • defaultCharset - the default charset for text/plain, text/html content, if not set reverts to Latin-1 + *
  • showAttachmentLinks
  • - if set to true, show inlined attachment links + *
+ * + * @constructor + * @param {Object} [options] Optional options object + */ +function MailParser(options){ + + // Make MailParser a Stream object + Stream.call(this); + this.writable = true; + + /** + * Options object + * @public */ this.options = options || {}; + + /** + * Indicates current state the parser is in + * @private */ this._state = STATES.header; + + /** + * The remaining data from the previos chunk which is waiting to be processed + * @private */ this._remainder = ""; + + /** + * The complete tree structure of the e-mail + * @public */ this.mimeTree = this._createMimeNode(); + + /** + * Current node of the multipart mime tree that is being processed + * @private */ this._currentNode = this.mimeTree; + + // default values for the root node + this._currentNode.priority = "normal"; + + /** + * An object of already used attachment filenames + * @private */ this._fileNames = {}; + + /** + * An array of multipart nodes + * @private */ this._multipartTree = []; + + + /** + * This is the final mail structure object that is returned to the client + * @public */ this.mailData = {}; + + /** + * Line counter for debugging + * @private */ this._lineCounter = 0; + + /** + * Did the last chunk end with \r + * @private */ this._lineFeed = false; + + /** + * Is the "headers" event already emitted + * @private */ this._headersSent = false; + + /** + * If the e-mail is in mbox format, unescape ">From " to "From " in body + * @private */ this._isMbox = -1; +} +// inherit methods and properties of Stream +utillib.inherits(MailParser, Stream); + +/** + *

Writes a value to the MailParser stream

+ * + * @param {Buffer|String} chunk The data to be written to the MailParser stream + * @param {String} [encoding] The encoding to be used when "chunk" is a string + * @returns {Boolean} Returns true + */ +MailParser.prototype.write = function(chunk, encoding){ + if( this._write(chunk, encoding) ){ + process.nextTick(this._process.bind(this)); + } + return true; +}; + +/** + *

Terminates the MailParser stream

+ * + *

If "chunk" is set, writes it to the Stream before terminating.

+ * + * @param {Buffer|String} chunk The data to be written to the MailParser stream + * @param {String} [encoding] The encoding to be used when "chunk" is a string + */ +MailParser.prototype.end = function(chunk, encoding){ + this._write(chunk, encoding); + + if(this.options.debug && this._remainder){ + console.log("REMAINDER: "+this._remainder); + } + + process.nextTick(this._process.bind(this, true)); +}; + +/** + *

Normalizes CRLF's before writing to the Mailparser stream, does not call `_process`

+ * + * @param {Buffer|String} chunk The data to be written to the MailParser stream + * @param {String} [encoding] The encoding to be used when "chunk" is a string + * @returns {Boolean} Returns true if writing the chunk was successful + */ +MailParser.prototype._write = function(chunk, encoding){ + if(typeof chunk == "string"){ + chunk = new Buffer(chunk, encoding); + } + + chunk = chunk && chunk.toString("binary") || ""; + + // if the last chunk ended with \r and this one begins + // with \n, it's a split line ending. Since the last \r + // was already used, skip the \n + if(this._lineFeed && chunk.charAt(0) === "\n"){ + chunk = chunk.substr(1); + } + this._lineFeed = chunk.substr(-1) === "\r"; + + if(chunk && chunk.length){ + this._remainder += chunk; + return true; + } + return false; +}; + + +/** + *

Processes the data written to the MailParser stream

+ * + *

The data is split into lines and each line is processed individually. Last + * line in the batch is preserved as a remainder since it is probably not a + * complete line but just the beginning of it. The remainder is later prepended + * to the next batch of data.

+ * + * @param {Boolean} [finalPart=false] if set to true indicates that this is the last part of the stream + */ +MailParser.prototype._process = function(finalPart){ + + finalPart = !!finalPart; + + var lines = this._remainder.split(/\r?\n|\r/), + line, i, len; + + if(!finalPart){ + this._remainder = lines.pop(); + // force line to 1MB chunks if needed + if(this._remainder.length>1048576){ + this._remainder = this._remainder.replace(/(.{1048576}(?!\r?\n|\r))/g,"$&\n"); + } + } + + for(i=0, len=lines.length; i < len; i++){ + line = lines[i]; + + if(this.options.unescapeSMTP && line.substr(0,2)==".."){ + line = line.substr(1); + } + + if(this._isMbox === true && line.match(/^\>+From /)){ + line = line.substr(1); + } + + if(this.options.debug){ + console.log("LINE " + (++this._lineCounter) + " ("+this._state+"): "+line); + } + + if(this._state == STATES.header){ + if(this._processStateHeader(line) === true){ + continue; + } + } + + if(this._state == STATES.body){ + + if(this._processStateBody(line) === true){ + continue; + } + + } + } + + if(finalPart){ + if(this._state == STATES.header && this._remainder){ + this._processStateHeader(this._remainder); + if(!this._headersSent){ + this.emit("headers", this._currentNode.parsedHeaders); + this._headersSent = true; + } + } + if(this._currentNode.content || this._currentNode.stream){ + this._finalizeContents(); + } + this._state = STATES.finished; + process.nextTick(this._processMimeTree.bind(this)); + } + + +}; + +/** + *

Processes a line while in header state

+ * + *

If header state ends and body starts, detect if the contents is an attachment + * and create a stream for it if needed

+ * + * @param {String} line The contents of a line to be processed + * @returns {Boolean} If state changes to body retuns true + */ +MailParser.prototype._processStateHeader = function(line){ + var attachment, lastPos = this._currentNode.headers.length - 1, + textContent = false, extension; + + // Check if the header ends and body starts + if(!line.length){ + if(lastPos>=0){ + this._processHeaderLine(lastPos); + } + if(!this._headersSent){ + this.emit("headers", this._currentNode.parsedHeaders); + this._headersSent = true; + } + + this._state = STATES.body; + + // if there's unprocessed header data, do it now + if(lastPos >= 0){ + this._processHeaderLine(lastPos); + } + + // this is a very simple e-mail, no content type set + if(!this._currentNode.parentNode && !this._currentNode.meta.contentType){ + this._currentNode.meta.contentType = "text/plain"; + } + + textContent = ["text/plain", "text/html"].indexOf(this._currentNode.meta.contentType || "") >= 0; + + // detect if this is an attachment or a text node (some agents use inline dispositions for text) + if(textContent && (!this._currentNode.meta.contentDisposition || this._currentNode.meta.contentDisposition == "inline")){ + this._currentNode.attachment = false; + }else if((!textContent || ["attachment", "inline"].indexOf(this._currentNode.meta.contentDisposition)>=0) && + !this._currentNode.meta.mimeMultipart){ + this._currentNode.attachment = true; + } + + // handle attachment start + if(this._currentNode.attachment){ + + this._currentNode.checksum = crypto.createHash("md5"); + + this._currentNode.meta.generatedFileName = this._generateFileName(this._currentNode.meta.fileName, this._currentNode.meta.contentType); + + this._currentNode.meta.contentId = this._currentNode.meta.contentId || + crypto.createHash("md5").update(this._currentNode.meta.generatedFileName).digest("hex") + "@mailparser"; + + extension = this._currentNode.meta.generatedFileName.split(".").pop().toLowerCase(); + + // Update content-type if it's an application/octet-stream and file extension is available + if(this._currentNode.meta.contentType == "application/octet-stream" && mime.lookup(extension)){ + this._currentNode.meta.contentType = mime.lookup(extension); + } + + attachment = this._currentNode.meta; + if(this.options.streamAttachments){ + if(this._currentNode.meta.transferEncoding == "base64"){ + this._currentNode.stream = new Streams.Base64Stream(); + }else if(this._currentNode.meta.transferEncoding == "quoted-printable"){ + this._currentNode.stream = new Streams.QPStream("binary"); + }else{ + this._currentNode.stream = new Streams.BinaryStream(); + } + attachment.stream = this._currentNode.stream; + + this.emit("attachment", attachment); + }else{ + this._currentNode.content = undefined; + } + } + + return true; + } + + // unfold header lines if needed + if(line.match(/^\s+/) && lastPos>=0){ + this._currentNode.headers[lastPos] += " " + line.trim(); + }else{ + this._currentNode.headers.push(line.trim()); + if(lastPos>=0){ + // if a complete header line is received, process it + this._processHeaderLine(lastPos); + } + } + + return false; +}; + +/** + *

Processes a line while in body state

+ * + * @param {String} line The contents of a line to be processed + * @returns {Boolean} If body ends return true + */ +MailParser.prototype._processStateBody = function(line){ + var i, len, node, + nodeReady = false; + + // Handle multipart boundaries + if(line.substr(0, 2) == "--"){ + for(i=0, len = this._multipartTree.length; i=0 && + !this._currentNode.attachment){ + this._handleTextLine(line); + }else if(this._currentNode.attachment){ + this._handleAttachmentLine(line); + } + + return false; +}; + +/** + *

Processes a complete unfolded header line

+ * + *

Processes a line from current node headers array and replaces its value. + * Input string is in the form of "X-Mailer: PHP" and its replacement would be + * an object {key: "x-mailer", value: "PHP"}

+ * + *

Additionally node meta object will be filled also, for example with data from + * To: From: Cc: etc fields.

+ * + * @param {Number} pos Which header element (from an header lines array) should be processed + */ +MailParser.prototype._processHeaderLine = function(pos){ + var key, value, parts, line; + + pos = pos || 0; + + if(!(line = this._currentNode.headers[pos]) || typeof line != "string"){ + return; + } + + if(!this._headersSent && this._isMbox < 0){ + if((this._isMbox = !!line.match(/^From /))){ + return; + } + } + + parts = line.split(":"); + + key = parts.shift().toLowerCase().trim(); + value = parts.join(":").trim(); + + switch(key){ + case "content-type": + this._parseContentType(value); + break; + case "mime-version": + this._currentNode.useMIME = true; + break; + case "date": + this._currentNode.meta.date = new Date(datetime.strtotime(value)*1000 || Date.now()); + break; + case "to": + if(this._currentNode.to && this._currentNode.to.length){ + this._currentNode.to = this._currentNode.to.concat(mimelib.parseAddresses(value)); + }else{ + this._currentNode.to = mimelib.parseAddresses(value); + } + break; + case "from": + if(this._currentNode.from && this._currentNode.from.length){ + this._currentNode.from = this._currentNode.from.concat(mimelib.parseAddresses(value)); + }else{ + this._currentNode.from = mimelib.parseAddresses(value); + } + break; + case "reply-to": + if(this._currentNode.replyTo && this._currentNode.replyTo.length){ + this._currentNode.replyTo = this._currentNode.replyTo.concat(mimelib.parseAddresses(value)); + }else{ + this._currentNode.replyTo = mimelib.parseAddresses(value); + } + break; + case "cc": + if(this._currentNode.cc && this._currentNode.cc.length){ + this._currentNode.cc = this._currentNode.cc.concat(mimelib.parseAddresses(value)); + }else{ + this._currentNode.cc = mimelib.parseAddresses(value); + } + break; + case "bcc": + if(this._currentNode.bcc && this._currentNode.bcc.length){ + this._currentNode.bcc = this._currentNode.bcc.concat(mimelib.parseAddresses(value)); + }else{ + this._currentNode.bcc = mimelib.parseAddresses(value); + } + break; + case "x-priority": + case "x-msmail-priority": + case "importance": + value = this._parsePriority(value); + this._currentNode.priority = value; + break; + case "message-id": + this._currentNode.meta.messageId = this._trimQuotes(value); + this._currentNode.messageId = this._currentNode.meta.messageId; + break; + case "references": + this._parseReferences(value); + break; + case "in-reply-to": + this._parseInReplyTo(value); + break; + case "thread-index": + this._currentNode.meta.threadIndex = value; + break; + case "content-transfer-encoding": + this._currentNode.meta.transferEncoding = value.toLowerCase(); + break; + case "subject": + this._currentNode.subject = this._encodeString(value); + break; + case "content-disposition": + this._parseContentDisposition(value); + break; + case "content-id": + this._currentNode.meta.contentId = this._trimQuotes(value); + break; + } + + if(this._currentNode.parsedHeaders[key]){ + if(!Array.isArray(this._currentNode.parsedHeaders[key])){ + this._currentNode.parsedHeaders[key] = [this._currentNode.parsedHeaders[key]]; + } + this._currentNode.parsedHeaders[key].push(this._replaceMimeWords(value)); + }else{ + this._currentNode.parsedHeaders[key] = this._replaceMimeWords(value); + } + + this._currentNode.headers[pos] = {key: key, value: value}; +}; + +/** + *

Creates an empty node element for the mime tree

+ * + *

Created element includes parentNode property and a childNodes array. This is + * needed to later walk the whole mime tree

+ * + * @param {Object} [parentNode] the parent object for the created node + * @returns {Object} node element for the mime tree + */ +MailParser.prototype._createMimeNode = function(parentNode){ + var node = { + parentNode: parentNode || this._currentNode || null, + headers: [], + parsedHeaders:{}, + meta: {}, + childNodes: [] + }; + + return node; +}; + +/** + *

Splits a header value into key-value pairs

+ * + *

Splits on ; - the first value will be set as defaultValue property and will + * not be handled, others will be split on = to key-value pairs

+ * + *

For example content-type: text/plain; charset=utf-8 will become:

+ * + *
+ * {
+ *     defaultValue: "text/plain",
+ *     charset: "utf-8"
+ * }
+ * 
+ * + * @param {String} value A string to be splitted into key-value pairs + * @returns {Object} a key-value object, with defaultvalue property + */ +MailParser.prototype._parseHeaderLineWithParams = function(value){ + var key, parts, returnValue = {}; + + parts = value.split(";"); + returnValue.defaultValue = parts.shift().toLowerCase(); + + for(var i=0, len = parts.length; iParses a Content-Type header field value

+ * + *

Fetches additional properties from the content type (charset etc.) and fills + * current node meta object with this data

+ * + * @param {String} value Content-Type string + * @returns {Object} parsed contenttype object + */ +MailParser.prototype._parseContentType = function(value){ + var fileName; + value = this._parseHeaderLineWithParams(value); + if(value){ + if(value.defaultValue){ + value.defaultValue = value.defaultValue.toLowerCase(); + this._currentNode.meta.contentType = value.defaultValue; + if(value.defaultValue.substr(0,"multipart/".length)=="multipart/"){ + this._currentNode.meta.mimeMultipart = value.defaultValue.substr("multipart/".length); + } + }else{ + this._currentNode.meta.contentType = "application/octet-stream"; + } + if(value.charset){ + value.charset = value.charset.toLowerCase(); + if(value.charset.substr(0,4)=="win-"){ + value.charset = "windows-"+value.charset.substr(4); + }else if(value.charset == "ks_c_5601-1987"){ + value.charset = "cp949"; + }else if(value.charset.match(/^utf\d/)){ + value.charset = "utf-"+value.charset.substr(3); + }else if(value.charset.match(/^latin[\-_]?\d/)){ + value.charset = "iso-8859-"+value.charset.replace(/\D/g,""); + }else if(value.charset.match(/^(us\-)?ascii$/)){ + value.charset = "utf-8"; + } + this._currentNode.meta.charset = value.charset; + } + if(value.format){ + this._currentNode.meta.textFormat = value.format.toLowerCase(); + } + if(value.delsp){ + this._currentNode.meta.textDelSp = value.delsp.toLowerCase(); + } + if(value.boundary){ + this._currentNode.meta.mimeBoundary = value.boundary; + } + + if(!this._currentNode.meta.fileName && (fileName = this._detectFilename(value))){ + this._currentNode.meta.fileName = fileName; + } + + if(value.boundary){ + this._currentNode.meta.mimeBoundary = value.boundary; + this._multipartTree.push({ + boundary: value.boundary, + node: this._currentNode + }); + } + } + return value; +}; + +/** + *

Parses file name from a Content-Type or Content-Disposition field

+ * + *

Supports RFC2231 for + * folded filenames

+ * + * @param {Object} value Parsed Content-(Type|Disposition) object + * @return {String} filename + */ +MailParser.prototype._detectFilename = function(value){ + var fileName="", i=0, parts, encoding, name; + + if(value.name){ + return this._replaceMimeWords(value.name); + } + + if(value.filename){ + return this._replaceMimeWords(value.filename); + } + + // RFC2231 + if(value["name*"]){ + fileName = value["name*"]; + }else if(value["filename*"]){ + fileName = value["filename*"]; + }else if(value["name*0*"]){ + while(value["name*"+(i)+"*"]){ + fileName += value["name*"+(i++)+"*"]; + } + }else if(value["filename*0*"]){ + while(value["filename*"+(i)+"*"]){ + fileName += value["filename*"+(i++)+"*"]; + } + } + + if(fileName){ + parts = fileName.split("'"); + encoding = parts.shift(); + name = parts.pop(); + if(name){ + return this._replaceMimeWords(this._replaceMimeWords("=?"+(encoding || "us-ascii")+"?Q?" + name.replace(/%/g,"=")+"?=")); + } + } + return ""; +}; + +/** + *

Parses Content-Disposition header field value

+ * + *

Fetches filename to current node meta object

+ * + * @param {String} value A Content-Disposition header field + */ +MailParser.prototype._parseContentDisposition = function(value){ + var fileName; + + value = this._parseHeaderLineWithParams(value); + + if(value){ + if(value.defaultValue){ + this._currentNode.meta.contentDisposition = value.defaultValue.trim().toLowerCase(); + } + if((fileName = this._detectFilename(value))){ + this._currentNode.meta.fileName = fileName; + } + } +}; + +/** + *

Parses "References" header

+ * + * @param {String} value References header field + */ +MailParser.prototype._parseReferences = function(value){ + this._currentNode.references = (this._currentNode.references || []).concat( + (value || "").toString(). + trim(). + split(/\s+/). + map(this._trimQuotes.bind(this)) + ); +}; + +/** + *

Parses "In-Reply-To" header

+ * + * @param {String} value In-Reply-To header field + */ +MailParser.prototype._parseInReplyTo = function(value){ + this._currentNode.inReplyTo = (this._currentNode.inReplyTo || []).concat( + (value || "").toString(). + trim(). + split(/\s+/). + map(this._trimQuotes.bind(this)) + ); +}; + +/** + *

Parses the priority of the e-mail

+ * + * @param {String} value The priority value + * @returns {String} priority string low|normal|high + */ +MailParser.prototype._parsePriority = function(value){ + value = value.toLowerCase().trim(); + if(!isNaN(parseInt(value,10))){ // support "X-Priority: 1 (Highest)" + value = parseInt(value, 10) || 0; + if(value == 3){ + return "normal"; + }else if(value > 3){ + return "low"; + }else{ + return "high"; + } + }else{ + switch(value){ + case "non-urgent": + case "low": + return "low"; + case "urgent": + case "hight": + return "high"; + } + } + return "normal"; +}; + +/** + *

Processes a line in text/html or text/plain node

+ * + *

Append the line to the content property

+ * + * @param {String} line A line to be processed + */ +MailParser.prototype._handleTextLine = function(line){ + + if(["quoted-printable", "base64"].indexOf(this._currentNode.meta.transferEncoding)>=0 || this._currentNode.meta.textFormat != "flowed"){ + if(typeof this._currentNode.content != "string"){ + this._currentNode.content = line; + }else{ + this._currentNode.content += "\n"+line; + } + }else{ + if(typeof this._currentNode.content != "string"){ + this._currentNode.content = line; + }else if(this._currentNode.content.match(/[ ]$/)){ + if(this._currentNode.meta.textFormat == "flowed" && this._currentNode.content.match(/(^|\n)-- $/)){ + // handle special case for usenet signatures + this._currentNode.content += "\n"+line; + }else{ + if(this._currentNode.meta.textDelSp == "yes"){ + this._currentNode.content = this._currentNode.content.replace(/[ ]+$/,""); + } + this._currentNode.content += line; + } + }else{ + this._currentNode.content += "\n"+line; + } + } +}; + +/** + *

Processes a line in an attachment node

+ * + *

If a stream is set up for the attachment write the line to the + * stream as a Buffer object, otherwise append it to the content property

+ * + * @param {String} line A line to be processed + */ +MailParser.prototype._handleAttachmentLine = function(line){ + if(!this._currentNode.attachment){ + return; + } + if(this._currentNode.stream){ + if(!this._currentNode.streamStarted){ + this._currentNode.streamStarted = true; + this._currentNode.stream.write(new Buffer(line, "binary")); + }else{ + this._currentNode.stream.write(new Buffer("\r\n"+line, "binary")); + } + }else if("content" in this._currentNode){ + if(typeof this._currentNode.content!="string"){ + this._currentNode.content = line; + }else{ + this._currentNode.content += "\r\n" + line; + } + } +}; + +/** + *

Finalizes a node processing

+ * + *

If the node is a text/plain or text/html, convert it to UTF-8 encoded string + * If it is an attachment, convert it to a Buffer or if an attachment stream is + * set up, close the stream

+ */ +MailParser.prototype._finalizeContents = function(){ + var streamInfo; + + if(this._currentNode.content){ + + if(!this._currentNode.attachment){ + + if(this._currentNode.meta.contentType == "text/html"){ + this._currentNode.meta.charset = this._detectHTMLCharset(this._currentNode.content) || this._currentNode.meta.charset || this.options.defaultCharset || "iso-8859-1"; + } + + if(this._currentNode.meta.transferEncoding == "quoted-printable"){ + this._currentNode.content = mimelib.decodeQuotedPrintable(this._currentNode.content, false, this._currentNode.meta.charset || this.options.defaultCharset || "iso-8859-1"); + if(this._currentNode.meta.textFormat == "flowed"){ + if(this._currentNode.meta.textDelSp == "yes"){ + this._currentNode.content = this._currentNode.content.replace(/(^|\n)-- \n/g, '$1-- \u0000').replace(/ \n/g, '').replace(/(^|\n)-- \u0000/g, '$1-- \n'); + }else{ + this._currentNode.content = this._currentNode.content.replace(/(^|\n)-- \n/g, '$1-- \u0000').replace(/ \n/g, ' ').replace(/(^|\n)-- \u0000/g, '$1-- \n'); + } + } + }else if(this._currentNode.meta.transferEncoding == "base64"){ + this._currentNode.content = mimelib.decodeBase64(this._currentNode.content, this._currentNode.meta.charset || this.options.defaultCharset || "iso-8859-1"); + }else{ + this._currentNode.content = this._convertStringToUTF8(this._currentNode.content); + } + }else{ + if(this._currentNode.meta.transferEncoding == "quoted-printable"){ + this._currentNode.content = mimelib.decodeQuotedPrintable(this._currentNode.content, false, "binary"); + }else if(this._currentNode.meta.transferEncoding == "base64"){ + this._currentNode.content = new Buffer(this._currentNode.content, "base64"); + }else{ + this._currentNode.content = new Buffer(this._currentNode.content, "binary"); + } + this._currentNode.checksum.update(this._currentNode.content); + this._currentNode.meta.checksum = this._currentNode.checksum.digest("hex"); + this._currentNode.meta.length = this._currentNode.content.length; + } + + } + + if(this._currentNode.stream){ + streamInfo = this._currentNode.stream.end() || {}; + if(streamInfo.checksum){ + this._currentNode.meta.checksum = streamInfo.checksum; + } + if(streamInfo.length){ + this._currentNode.meta.length = streamInfo.length; + } + } +}; + +/** + *

Processes the mime tree

+ * + *

Finds text parts and attachments from the tree. If there's several text/plain + * or text/html parts, join these into one

+ * + *

Emits "end" when finished

+ */ +MailParser.prototype._processMimeTree = function(){ + var returnValue = {}, i, len; + + this.mailData = {html:[], text:[], attachments:[]}; + + if(!this.mimeTree.meta.mimeMultipart){ + this._processMimeNode(this.mimeTree, 0); + }else{ + this._walkMimeTree(this.mimeTree); + } + + if(this.mailData.html.length){ + for(i=0, len=this.mailData.html.length; iWalks the mime tree and runs processMimeNode on each node of the tree

+ * + * @param {Object} node A mime tree node + * @param {Number} [level=0] current depth + */ +MailParser.prototype._walkMimeTree = function(node, level){ + level = level || 1; + for(var i=0, len = node.childNodes.length; iProcesses of a node in the mime tree

+ * + *

Pushes the node into appropriate this.mailData array (text/html to this.mailData.html array etc)

+ * + * @param {Object} node A mime tree node + * @param {Number} [level=0] current depth + * @param {String} mimeMultipart Type of multipart we are dealing with (if any) + */ +MailParser.prototype._processMimeNode = function(node, level, mimeMultipart){ + var i, len; + + level = level || 0; + + if(!node.attachment){ + switch(node.meta.contentType){ + case "text/html": + if(mimeMultipart == "mixed" && this.mailData.html.length){ + for(i=0, len = this.mailData.html.length; iJoins two HTML blocks by removing the header of the added element

+ * + * @param {Object} htmlNode Original HTML contents node object + * @param {String} newHTML HTML text to add to the original object node + */ +MailParser.prototype._joinHTMLNodes = function(htmlNode, newHTML){ + var inserted = false; + + // process new HTML + newHTML = (newHTML || "").toString("utf-8").trim(); + + // remove doctype from the beginning + newHTML = newHTML.replace(/^\s*<\!doctype( [^>]*)?>/gi, ""); + + // remove and blocks + newHTML = newHTML.replace(/]*)?>(.*)<\/head( [^>]*)?>/gi, ""). + replace(/<\/?html( [^>]*)?>/gi, ""). + trim(); + + // keep only text between tags (if ]*)?>(.*)<\/body( [^>]*)?>/gi, function(match, body){ + newHTML = body.trim(); + }); + + htmlNode.content = (htmlNode.content || "").toString("utf-8").trim(); + + htmlNode.content = htmlNode.content.replace(/<\/body( [^>]*)?>/i, function(match){ + inserted = true; + return "
\n" + newHTML + match; + }); + + if(!inserted){ + htmlNode.content += "
\n" + newHTML; + } +}; + +/** + *

Adds filename placeholder to the HTML if needed

+ * + * @param {Object} htmlNode Original HTML contents node object + * @param {String} attachment Attachment meta object + */ +MailParser.prototype._joinHTMLAttachment = function(htmlNode, attachment){ + var inserted = false, + fname = attachment.generatedFileName.replace(//g, ">").replace(/"/g, """), + newHTML; + + newHTML = "\n"; + + htmlNode.content = (htmlNode.content || "").toString("utf-8").trim(); + + htmlNode.content = htmlNode.content.replace(/<\/body\b[^>]*>/i, function(match){ + inserted = true; + return "
\n" + newHTML + match; + }); + + if(!inserted){ + htmlNode.content += "
\n" + newHTML; + } +}; + +/** + *

Joins two HTML blocks by removing the header of the added element

+ * + * @param {Sting} htmlNode Original HTML contents + * @param {String} newHTML HTML text to add to the original object node + * @return {String} Joined HTML + */ +MailParser.prototype._concatHTML = function(firstNode, secondNode){ + var headerNode = "", + htmlHeader = ""; + + firstNode = (firstNode || "").toString("utf-8"); + secondNode = (secondNode || "").toString("utf-8"); + + if(!secondNode){ + return firstNode; + } + if(!firstNode){ + return secondNode; + } + + if(firstNode.substr(0, 1024).replace(/\r?\n/g,"\u0000").match(/^[\s\u0000]*(<\!doctype\b[^>]*?>)?[\s\u0000]*<(html|head)\b[^>]*?>/i)){ + headerNode = firstNode; + }else if(secondNode.substr(0, 1024).replace(/\r?\n/g,"\u0000").match(/^[\s\u0000]*(<\!doctype\b[^>]*?>)?[\s\u0000]*<(html|head)\b[^>]*?>/i)){ + headerNode = secondNode; + } + + if(headerNode){ + headerNode.replace(/\r?\n/g, "\u0000").replace(/^[\s\u0000]*(<\!doctype\b[^>]*?>)?[\s\u0000]*<(html|head)\b[^>]*>.*?<\/(head)\b[^>]*>(.*?]*>)?/i, function(h){ + var doctype = h.match(/^[\s\u0000]*(<\!doctype\b[^>]*?>)/i), + html = h.match(/]*?>/i), + head = h.match(/]*?>/i), + body = h.match(/]*?>/i); + + doctype = doctype && doctype[1] && doctype[1] + "\n" || ""; + html = html && html[0] || ""; + head = head && head[0] || ""; + body = body && body[0] || ""; + h = h.replace(/<[\!\/]?(doctype|html|head|body)\b[^>]*?>/ig, "\u0000").replace(/\u0000+/g, "\n").trim(); + + htmlHeader = doctype + html + "\n" + head + (h ? h + "\n" : "") + "\n" + body + "\n"; + }); + } + + firstNode = firstNode.replace(/\r?\n/g, "\u0000"). + replace(/[\s\u0000]*]*>.*?<\/(head|body)\b[^>]*>/gi, ""). + replace(/[\s\u0000]*<[\!\/]?(doctype|html|body)\b[^>]*>[\s\u0000]*/gi, ""). + replace(/\u0000/g, "\n"); + + secondNode = secondNode.replace(/\r?\n/g, "\u0000"). + replace(/[\s\u0000]*]*>.*?<\/(head|body)\b[^>]*>/gi, ""). + replace(/[\s\u0000]*<[\!\/]?(doctype|html|body)\b[^>]*>[\s\u0000]*/gi, ""). + replace(/\u0000/g, "\n"); + + return htmlHeader + firstNode + secondNode + (htmlHeader? (firstNode || secondNode ? "\n" : "") + "\n" : ""); +}; + +/** + *

Converts a string from one charset to another

+ * + * @param {Buffer|String} value A String to be converted + * @param {String} fromCharset source charset + * @param {String} [toCharset="UTF-8"] destination charset + * @returns {Buffer} Converted string as a Buffer (or SlowBuffer) + */ +MailParser.prototype._convertString = function(value, fromCharset, toCharset){ + toCharset = (toCharset || "utf-8").toUpperCase(); + fromCharset = (fromCharset || "utf-8").toUpperCase(); + + value = typeof value=="string"?new Buffer(value, "binary"):value; + + if(toCharset == fromCharset){ + return value; + } + + value = encodinglib.convert(value, toCharset, fromCharset); + + return value; +}; + +/** + *

Converts a string to UTF-8

+ * + * @param {String} value String to be encoded + * @returns {String} UTF-8 encoded string + */ +MailParser.prototype._convertStringToUTF8 = function(value){ + value = this._convertString(value, this._currentNode.meta.charset || this.options.defaultCharset || "iso-8859-1").toString("utf-8"); + return value; +}; + +/** + *

Encodes a header string to UTF-8

+ * + * @param {String} value String to be encoded + * @returns {String} UTF-8 encoded string + */ +MailParser.prototype._encodeString = function(value){ + value = this._replaceMimeWords(this._convertStringToUTF8(value)); + return value; +}; + +/** + *

Replaces mime words in a string with UTF-8 encoded strings

+ * + * @param {String} value String to be converted + * @returns {String} converted string + */ +MailParser.prototype._replaceMimeWords = function(value){ + return value. + replace(/(=\?[^?]+\?[QqBb]\?[^?]+\?=)\s+(?==\?[^?]+\?[QqBb]\?[^?]+\?=)/g, "$1"). // join mimeWords + replace(/\=\?[^?]+\?[QqBb]\?[^?]+\?=/g, (function(a){ + return mimelib.decodeMimeWord(a.replace(/\s/g,'')); + }).bind(this)); +}; + +/** + *

Removes enclosing quotes ("", '', <>) from a string

+ * + * @param {String} value String to be converted + * @returns {String} converted string + */ +MailParser.prototype._trimQuotes = function(value){ + value = (value || "").trim(); + if((value.charAt(0)=='"' && value.charAt(value.length-1)=='"') || + (value.charAt(0)=="'" && value.charAt(value.length-1)=="'") || + (value.charAt(0)=="<" && value.charAt(value.length-1)==">")){ + value = value.substr(1,value.length-2); + } + return value; +}; + +/** + *

Generates a context unique filename for an attachment

+ * + *

If a filename already exists, append a number to it

+ * + *
    + *
  • file.txt
  • + *
  • file-1.txt
  • + *
  • file-2.txt
  • + *
+ * + * @param {String} fileName source filename + * @param {String} contentType source content type + * @returns {String} generated filename + */ +MailParser.prototype._generateFileName = function(fileName, contentType){ + var ext, defaultExt = "", fileRootName; + + if(contentType){ + defaultExt = mime.extension(contentType); + defaultExt = defaultExt?"."+defaultExt:""; + } + + fileName = fileName || "attachment"+defaultExt; + + // remove path if it is included in the filename + fileName = fileName.toString().split(/[\/\\]+/).pop().replace(/^\.+/,"") || "attachment"; + fileRootName = fileName.replace(/(?:\-\d+)+(\.[^.]*)$/, "$1") || "attachment"; + + if(fileRootName in this._fileNames){ + this._fileNames[fileRootName]++; + ext = fileName.substr((fileName.lastIndexOf(".") || 0)+1); + if(ext == fileName){ + fileName += "-" + this._fileNames[fileRootName]; + }else{ + fileName = fileName.substr(0, fileName.length - ext.length - 1) + "-" + this._fileNames[fileRootName] + "." + ext; + } + }else{ + this._fileNames[fileRootName] = 0; + } + + return fileName; +}; + + +/** + *

Replaces character set to UTF-8 in HTML <meta> tags

+ * + * @param {String} HTML html contents + * @returns {String} updated HTML + */ +MailParser.prototype._updateHTMLCharset = function(html){ + + html = html.replace(/\n/g,"\u0000"). + replace(/]*>/gi, function(meta){ + if(meta.match(/http\-equiv\s*=\s*"?content\-type/i)){ + return ''; + } + if(meta.match(/\scharset\s*=\s*['"]?[\w\-]+["'\s>\/]/i)){ + return ''; + } + return meta; + }). + replace(/\u0000/g,"\n"); + + return html; +}; + +/** + *

Detects the charset of an HTML file

+ * + * @param {String} HTML html contents + * @returns {String} Charset for the HTML + */ +MailParser.prototype._detectHTMLCharset = function(html){ + var charset, input, meta; + + if(typeof html !=" string"){ + html = html.toString("ascii"); + } + + if((meta = html.match(/]*?>/i))){ + input = meta[0]; + } + + if(input){ + charset = input.match(/charset\s?=\s?([a-zA-Z\-_:0-9]*);?/); + if(charset){ + charset = (charset[1] || "").trim().toLowerCase(); + } + } + + if(!charset && (meta = html.match(/=0.1.4", + "iconv": "*", + "mime": "*", + "mimelib": ">=0.2.6" + }, + "deprecated": false, + "description": "Asynchronous and non-blocking parser for mime encoded e-mail messages", + "devDependencies": { + "nodeunit": "*" + }, + "engine": { + "node": ">=0.4" + }, + "homepage": "https://github.com/andris9/mailparser#readme", + "keywords": [ + "e-mail", + "mime", + "parser" + ], + "licenses": [ + { + "type": "MIT", + "url": "http://github.com/andris9/mailparser/blob/master/LICENSE" + } + ], + "main": "./lib/mailparser", + "maintainers": [ + { + "name": "andris", + "email": "andris@node.ee" + } + ], + "name": "mailparser", + "optionalDependencies": { + "iconv": "*" + }, + "repository": { + "type": "git", + "url": "git+ssh://git@github.com/andris9/mailparser.git" + }, + "scripts": { + "test": "nodeunit test/" + }, + "version": "0.3.6" +} diff --git a/test/mailparser.js b/test/mailparser.js new file mode 100644 index 0000000..7fc6b78 --- /dev/null +++ b/test/mailparser.js @@ -0,0 +1,1337 @@ +var MailParser = require("../lib/mailparser").MailParser, + testCase = require('nodeunit').testCase, + utillib = require("util"), + encodinglib = require("encoding"), + fs = require("fs"); + +exports["General tests"] = { + "Many chunks": function(test){ + var encodedText = "Content-Type: text/plain; charset=utf-8\r\n" + + "\r\n" + + "ÕÄ\r\n" + + "ÖÜ", // \r\nÕÄÖÜ + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + + for(var i=0, len = mail.length; i\n" + + "\r" + + "1234", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.deepEqual(mail.references, ["mail1"]); + test.done(); + }); + }, + + "Multiple reference values": function(test){ + var encodedText = "Content-type: text/plain\r" + + "References: \n" + + " \n" + + "\r" + + "1234", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.deepEqual(mail.references, ["mail1", "mail2", "mail3"]); + test.done(); + }); + }, + + "Multiple reference fields": function(test){ + var encodedText = "Content-type: text/plain\r" + + "References: \n" + + "References: \n" + + "\r" + + "1234", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.deepEqual(mail.references, ["mail1", "mail3"]); + test.done(); + }); + }, + + "Single in-reply-to": function(test){ + var encodedText = "Content-type: text/plain\r" + + "in-reply-to: \n" + + "\r" + + "1234", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.deepEqual(mail.inReplyTo, ["mail1"]); + test.done(); + }); + }, + + "Multiple in-reply-to values": function(test){ + var encodedText = "Content-type: text/plain\r" + + "in-reply-to: \n" + + " \n" + + "\r" + + "1234", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.deepEqual(mail.inReplyTo, ["mail1", "mail2", "mail3"]); + test.done(); + }); + }, + + "Multiple in-reply-to fields": function(test){ + var encodedText = "Content-type: text/plain\r" + + "in-reply-to: \n" + + "in-reply-to: \n" + + "\r" + + "1234", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.deepEqual(mail.inReplyTo, ["mail1", "mail3"]); + test.done(); + }); + }, + + "Reply To address": function(test){ + var encodedText = "Reply-TO: andris \r" + + "Subject: ÕÄÖÜ\n" + + "\r" + + "1234", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.deepEqual(mail.replyTo, [{name:"andris", address:"andris@disposebox.com"}]); + test.done(); + }); + } + +}; + +exports["Text encodings"] = { + + "Plaintext encoding: Default": function(test){ + var encodedText = [13,10, 213, 196, 214, 220], // \r\nÕÄÖÜ + mail = new Buffer(encodedText); + + test.expect(1); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ"); + test.done(); + }); + }, + + "Plaintext encoding: Header defined": function(test){ + var encodedText = "Content-Type: TEXT/PLAIN; CHARSET=UTF-8\r\n" + + "\r\n" + + "ÕÄÖÜ", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ"); + test.done(); + }); + }, + + "HTML encoding: From ": function(test){ + var encodedText = "Content-Type: text/html\r\n" + + "\r\n" + + "ÕÄÖÜ", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal((mail.html || "").substr(-4), "ÕÄÖÜ"); + test.done(); + }); + }, + + "HTML encoding: Conflicting headers": function(test){ + var encodedText = "Content-Type: text/html; charset=iso-8859-1\r\n" + + "\r\n" + + "ÕÄÖÜ", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal((mail.html || "").substr(-4), "ÕÄÖÜ"); + test.done(); + }); + }, + "HTML encoding: Header defined": function(test){ + var encodedText = "Content-Type: text/html; charset=iso-UTF-8\r\n"+ + "\r\n"+ + "ÕÄÖÜ", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.html, "ÕÄÖÜ"); + test.done(); + }); + }, + "Mime Words": function(test){ + var encodedText = "Content-type: text/plain; charset=utf-8\r\n" + + "Subject: =?iso-8859-1?Q?Avaldu?= =?iso-8859-1?Q?s_lepingu_?=\r\n =?iso-8859-1?Q?l=F5petamise?= =?iso-8859-1?Q?ks?=\r\n", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.subject, "Avaldus lepingu lõpetamiseks"); + test.done(); + }); + } +}; + +exports["Binary attachment encodings"] = { + "Quoted-Printable": function(test){ + var encodedText = "Content-Type: application/octet-stream\r\n"+ + "Content-Transfer-Encoding: QUOTED-PRINTABLE\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(Array.prototype.slice.apply(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].content || []).join(","), "0,1,2,3,253,254,255"); + test.done(); + }); + }, + "Base64": function(test){ + var encodedText = "Content-Type: application/octet-stream\r\n"+ + "Content-Transfer-Encoding: base64\r\n"+ + "\r\n"+ + "AAECA/3+/w==", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(Array.prototype.slice.apply(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].content || []).join(","), "0,1,2,3,253,254,255"); + test.done(); + }); + }, + "8bit": function(test){ + var encodedText = "Content-Type: application/octet-stream\r\n"+ + "\r\n"+ + "ÕÄÖÜ", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(Array.prototype.slice.apply(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].content || []).join(","), "195,149,195,132,195,150,195,156"); + test.done(); + }); + } + +}; + +exports["Attachment Content-Id"] = { + "Default": function(test){ + var encodedText = "Content-Type: application/octet-stream\r\n"+ + "Content-Transfer-Encoding: QUOTED-PRINTABLE\r\n"+ + "Content-Disposition: attachment; filename=\"=?UTF-8?Q?=C3=95=C3=84=C3=96=C3=9C?=\"\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].contentId, "ef694232fea1c01c16fb8a03a0ca710c@mailparser"); + test.done(); + }); + }, + + "Defined": function(test){ + var encodedText = "Content-Type: application/octet-stream\r\n"+ + "Content-Transfer-Encoding: QUOTED-PRINTABLE\r\n"+ + "Content-Disposition: attachment; filename=\"=?UTF-8?Q?=C3=95=C3=84=C3=96=C3=9C?=\"\r\n"+ + "Content-Id: test@localhost\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].contentId, "test@localhost"); + test.done(); + }); + } +}; + +exports["Attachment filename"] = { + + "Content-Disposition filename": function(test){ + var encodedText = "Content-Type: application/octet-stream\r\n"+ + "Content-Transfer-Encoding: QUOTED-PRINTABLE\r\n"+ + "Content-Disposition: attachment; filename=\"=?UTF-8?Q?=C3=95=C3=84=C3=96=C3=9C?=\"\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].fileName, "ÕÄÖÜ"); + test.done(); + }); + }, + "Content-Disposition filename*": function(test){ + var encodedText = "Content-Type: application/octet-stream\r\n"+ + "Content-Transfer-Encoding: QUOTED-PRINTABLE\r\n"+ + "Content-Disposition: attachment; filename*=\"UTF-8''%C3%95%C3%84%C3%96%C3%9C\"\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].fileName, "ÕÄÖÜ"); + test.done(); + }); + }, + "Content-Disposition filename*X*": function(test){ + var encodedText = "Content-Type: application/octet-stream\r\n"+ + "Content-Transfer-Encoding: QUOTED-PRINTABLE\r\n"+ + "Content-Disposition: attachment;\r\n"+ + " filename*0*=UTF-8''%C3%95%C3%84;\r\n"+ + " filename*1*=%C3%96%C3%9C\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].fileName, "ÕÄÖÜ"); + test.done(); + }); + }, + + "Content-Type name": function(test){ + var encodedText = "Content-Type: application/octet-stream; name=\"=?UTF-8?Q?=C3=95=C3=84=C3=96=C3=9C?=\"\r\n"+ + "Content-Transfer-Encoding: QUOTED-PRINTABLE\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].fileName, "ÕÄÖÜ"); + test.done(); + }); + }, + "Content-Type name*": function(test){ + var encodedText = "Content-Type: application/octet-stream;\r\n"+ + " name*=UTF-8''%C3%95%C3%84%C3%96%C3%9C\r\n"+ + "Content-Transfer-Encoding: QUOTED-PRINTABLE\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].fileName, "ÕÄÖÜ"); + test.done(); + }); + }, + "Content-Type name*X*": function(test){ + var encodedText = "Content-Type: application/octet-stream;\r\n"+ + " name*0*=UTF-8''%C3%95%C3%84;\r\n"+ + " name*1*=%C3%96%C3%9C\r\n"+ + "Content-Transfer-Encoding: QUOTED-PRINTABLE\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].fileName, "ÕÄÖÜ"); + test.done(); + }); + }, + "Default name from Content-type": function(test){ + var encodedText = "Content-Type: application/pdf\r\n"+ + "Content-Transfer-Encoding: QUOTED-PRINTABLE\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].generatedFileName, "attachment.pdf"); + test.done(); + }); + }, + "Default name": function(test){ + var encodedText = "Content-Type: application/octet-stream\r\n"+ + "Content-Transfer-Encoding: QUOTED-PRINTABLE\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].generatedFileName, "attachment.bin"); + test.done(); + }); + }, + "Multiple filenames - Same": function(test){ + var encodedText = "Content-Type: multipart/mixed; boundary=ABC\r\n"+ + "\r\n"+ + "--ABC\r\n"+ + "Content-Type: application/octet-stream; name=\"test.txt\"\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF\r\n"+ + "--ABC\r\n"+ + "Content-Type: application/octet-stream; name=\"test.txt\"\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF\r\n"+ + "--ABC--", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].generatedFileName, "test.txt"); + test.equal(mail.attachments && mail.attachments[1] && mail.attachments[1].content && mail.attachments[1].generatedFileName, "test-1.txt"); + test.done(); + }); + }, + "Multiple filenames - Different": function(test){ + var encodedText = "Content-Type: multipart/mixed; boundary=ABC\r\n"+ + "\r\n"+ + "--ABC\r\n"+ + "Content-Type: application/octet-stream\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF\r\n"+ + "--ABC\r\n"+ + "Content-Type: application/octet-stream; name=\"test.txt\"\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF\r\n"+ + "--ABC--", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].generatedFileName, "attachment.bin"); + test.equal(mail.attachments && mail.attachments[1] && mail.attachments[1].content && mail.attachments[1].generatedFileName, "test.txt"); + test.done(); + }); + }, + "Multiple filenames - with number": function(test){ + var encodedText = "Content-Type: multipart/mixed; boundary=ABC\r\n"+ + "\r\n"+ + "--ABC\r\n"+ + "Content-Type: application/octet-stream; name=\"somename.txt\"\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF\r\n"+ + "--ABC\r\n"+ + "Content-Type: application/octet-stream; name=\"somename-1.txt\"\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF\r\n"+ + "--ABC\r\n"+ + "Content-Type: application/octet-stream; name=\"somename.txt\"\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF\r\n"+ + "--ABC\r\n"+ + "Content-Type: application/octet-stream; name=\"somename-1-1.txt\"\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF\r\n"+ + "--ABC--", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].generatedFileName, "somename.txt"); + test.equal(mail.attachments && mail.attachments[1] && mail.attachments[1].content && mail.attachments[1].generatedFileName, "somename-1-1.txt"); + test.equal(mail.attachments && mail.attachments[2] && mail.attachments[2].content && mail.attachments[2].generatedFileName, "somename-2.txt"); + test.equal(mail.attachments && mail.attachments[3] && mail.attachments[3].content && mail.attachments[3].generatedFileName, "somename-1-1-3.txt"); + test.done(); + }); + }, + "Generate filename from Content-Type": function(test){ + var encodedText = "Content-Type: multipart/mixed; boundary=ABC\r\n"+ + "\r\n"+ + "--ABC\r\n"+ + "Content-Type: application/pdf\r\n"+ + "\r\n"+ + "=00=01=02=03=FD=FE=FF\r\n"+ + "--ABC--", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.attachments && mail.attachments[0] && mail.attachments[0].content && mail.attachments[0].generatedFileName, "attachment.pdf"); + test.done(); + }); + } + +}; + +exports["Plaintext format"] = { + "Default": function(test){ + var encodedText = "Content-Type: text/plain;\r\n\r\nFirst line \r\ncontinued", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "First line \ncontinued"); + test.done(); + }); + }, + "Flowed": function(test){ + var encodedText = "Content-Type: text/plain; format=flowed\r\n\r\nFirst line \r\ncontinued \r\nand so on", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "First line continued and so on"); + test.done(); + }); + }, + "Flowed Signature": function(test){ + var encodedText = "Content-Type: text/plain; format=flowed\r\n\r\nHow are you today?\r\n"+ + "-- \r\n"+ + "Signature\r\n", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "How are you today?\n-- \nSignature\n"); + test.done(); + }); + }, + "Fixed": function(test){ + var encodedText = "Content-Type: text/plain; format=fixed\r\n\r\nFirst line \r\ncontinued \r\nand so on", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "First line \ncontinued \nand so on"); + test.done(); + }); + }, + "DelSp": function(test){ + var encodedText = "Content-Type: text/plain; format=flowed; delsp=yes\r\n\r\nFirst line \r\ncontinued \r\nand so on", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "First linecontinuedand so on"); + test.done(); + }); + }, + "Quoted printable, Flowed": function(test){ + var encodedText = "Content-Type: text/plain; format=flowed\r\nContent-Transfer-Encoding: QUOTED-PRINTABLE\r\n\r\nFoo =\n\nBar =\n\nBaz", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "Foo Bar Baz"); + test.done(); + }); + }, + "Quoted printable, Flowed Signature": function(test){ + var encodedText = "Content-Type: text/plain; format=flowed\r\nContent-Transfer-Encoding: QUOTED-PRINTABLE\r\n\r\nHow are you today?\r\n"+ + "-- \r\n"+ + "Signature\r\n", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "How are you today?\n-- \nSignature\n"); + test.done(); + }); + }, + "Quoted printable, DelSp": function(test){ + var encodedText = "Content-Type: text/plain; format=flowed; delsp=yes\r\nContent-Transfer-Encoding: QUOTED-PRINTABLE\r\n\r\nFoo =\n\nBar =\n\nBaz", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "FooBarBaz"); + test.done(); + }); + } +}; + +exports["Transfer encoding"] = { + "Quoted-Printable Default charset": function(test){ + var encodedText = "Content-type: text/plain\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n=D5=C4=D6=DC", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ"); + test.done(); + }); + }, + "Quoted-Printable UTF-8": function(test){ + var encodedText = "Content-type: text/plain; charset=utf-8\r\nContent-Transfer-Encoding: QUOTED-PRINTABLE\r\n\r\n=C3=95=C3=84=C3=96=C3=9C", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ"); + test.done(); + }); + }, + "Base64 Default charset": function(test){ + var encodedText = "Content-type: text/plain\r\nContent-Transfer-Encoding: bAse64\r\n\r\n1cTW3A==", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ"); + test.done(); + }); + }, + "Base64 UTF-8": function(test){ + var encodedText = "Content-type: text/plain; charset=utf-8\r\nContent-Transfer-Encoding: bAse64\r\n\r\nw5XDhMOWw5w=", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ"); + test.done(); + }); + }, + "Mime Words": function(test){ + var encodedText = "Content-type: text/plain; charset=utf-8\r\nSubject: =?iso-8859-1?Q?Avaldu?= =?iso-8859-1?Q?s_lepingu_?=\r\n =?iso-8859-1?Q?l=F5petamise?= =?iso-8859-1?Q?ks?=\r\n", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.subject, "Avaldus lepingu lõpetamiseks"); + test.done(); + }); + }, + "Mime Words with invalid linebreaks (Sparrow)": function(test){ + var encodedText = "Content-type: text/plain; charset=utf-8\r\n" + + "Subject: abc=?utf-8?Q?=C3=B6=C\r\n"+ + " 3=B5=C3=BC?=", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.subject, "abcöõü"); + test.done(); + }); + }, + "8bit Default charset": function(test){ + var encodedText = "Content-type: text/plain\r\nContent-Transfer-Encoding: 8bit\r\n\r\nÕÄÖÜ", + textmap = encodedText.split('').map(function(chr){return chr.charCodeAt(0);}), + mail = new Buffer(textmap); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ"); + test.done(); + }); + }, + "8bit UTF-8": function(test){ + var encodedText = "Content-type: text/plain; charset=utf-8\r\nContent-Transfer-Encoding: 8bit\r\n\r\nÕÄÖÜ", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ"); + test.done(); + }); + }, + "Invalid Quoted-Printable": function(test){ + var encodedText = "Content-type: text/plain; charset=utf-8\r\nContent-Transfer-Encoding: QUOTED-PRINTABLE\r\n\r\n==C3==95=C3=84=C3=96=C3=9C=", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "=�=�ÄÖÜ"); + test.done(); + }); + }, + "Invalid BASE64": function(test){ + var encodedText = "Content-type: text/plain; charset=utf-8\r\nContent-Transfer-Encoding: base64\r\n\r\nw5XDhMOWw5", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(Array.prototype.map.call(mail.text, function(chr){return chr.charCodeAt(0);}).join(","), "213,196,214,65533"); + test.done(); + }); + }, + "gb2312 mime words": function(test){ + var encodedText = "From: =?gb2312?B?086yyZjl?= user@ldkf.com.tw\r\n\r\nBody", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.deepEqual(mail.from, [{address: 'user@ldkf.com.tw', name: '游采樺'}]); + test.done(); + }); + } +}; + +exports["Multipart content"] = { + "Simple": function(test){ + var encodedText = "Content-type: multipart/mixed; boundary=ABC\r\n\r\n--ABC\r\nContent-type: text/plain; charset=utf-8\r\n\r\nÕÄÖÜ\r\n--ABC--", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ"); + test.done(); + }); + }, + "Nested": function(test){ + var encodedText = "Content-type: multipart/mixed; boundary=ABC\r\n"+ + "\r\n"+ + "--ABC\r\n"+ + "Content-type: multipart/related; boundary=DEF\r\n"+ + "\r\n"+ + "--DEF\r\n"+ + "Content-type: text/plain; charset=utf-8\r\n"+ + "\r\n"+ + "ÕÄÖÜ\r\n"+ + "--DEF--\r\n"+ + "--ABC--", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ"); + test.done(); + }); + }, + "Inline text (Sparrow)": function(test){ + var encodedText = "Content-type: multipart/mixed; boundary=ABC\r\n"+ + "\r\n"+ + "--ABC\r\n"+ + "Content-Type: text/plain; charset=\"utf-8\"\r\n"+ + "Content-Transfer-Encoding: 8bit\r\n"+ + "Content-Disposition: inline\r\n"+ + "\r\n"+ + "ÕÄÖÜ\r\n"+ + "--ABC--", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ"); + test.done(); + }); + }, + "Different Levels": function(test){ + var encodedText = "Content-type: multipart/mixed; boundary=ABC\r\n"+ + "\r\n"+ + "--ABC\r\n"+ + "Content-type: text/html; charset=utf-8\r\n"+ + "\r\n"+ + "ÕÄÖÜ2\r\n"+ + "--ABC\r\n"+ + "Content-type: multipart/related; boundary=DEF\r\n"+ + "\r\n"+ + "--DEF\r\n"+ + "Content-type: text/plain; charset=utf-8\r\n"+ + "\r\n"+ + "ÕÄÖÜ1\r\n"+ + "--DEF--\r\n"+ + "--ABC--", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + + mailparser.on("end", function(mail){ + test.equal(mail.text, "ÕÄÖÜ1"); + test.equal(mail.html, "ÕÄÖÜ2"); + test.done(); + }); + } +}; + +exports["Attachment info"] = { + "Included integrity": function(test){ + var encodedText = "Content-type: multipart/mixed; boundary=ABC\r\n"+ + "\r\n"+ + "--ABC\r\n"+ + "Content-Type: application/octet-stream\r\n"+ + "Content-Transfer-Encoding: quoted-printable\r\n"+ + "Content-Disposition: attachment\r\n"+ + "\r\n"+ + "=00=01=02=03=04=05=06\r\n"+ + "--ABC--", + expectedHash = "9aa461e1eca4086f9230aa49c90b0c61", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + + for(var i=0, len = mail.length; itest 1

\r\n"+ + "--ABC\r\n"+ + "Content-Type: application/octet-stream\r\n"+ + "Content-Transfer-Encoding: base64\r\n"+ + "Content-Disposition: attachment; filename=\"test.pdf\"\r\n"+ + "\r\n"+ + "AAECAwQFBg==\r\n"+ + "--ABC\r\n"+ + "Content-Type: text/html\r\n"+ + "\r\n"+ + "

test 2

\r\n"+ + "--ABC--", + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser({showAttachmentLinks: true}); + + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.html, '

test 1


\n\n
\n

test 2

'); + test.done(); + }); + } +}; + +exports["Advanced nested HTML"] = function(test){ + var mail = fs.readFileSync(__dirname + "/nested.eml"); + + test.expect(2); + var mailparser = new MailParser(); + + for(var i=0, len = mail.length; iDear Sir

\n

Good evening.

\n

The footer

\n"); + test.done(); + }); +}; + +exports["MBOX format"] = { + "Not a mbox": function(test){ + var encodedText = "Content-Type: text/plain; charset=utf-8\r\n" + + "\r\n" + + "ÕÄ\r\n" + + "ÖÜ", // \r\nÕÄÖÜ + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + + for(var i=0, len = mail.length; iFrom '": function(test){ + var encodedText = "Content-Type: text/plain; charset=utf-8\r\n" + + "\r\n" + + ">From test\r\n" + + ">>From pest", // \r\nÕÄÖÜ + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + + for(var i=0, len = mail.length; iFrom test\n>>From pest"); + test.done(); + }); + }, + + "Unescape '>From '": function(test){ + var encodedText = "From MAILER-DAEMON Fri Jul 8 12:08:34 2011\r\n"+ + "Content-Type: text/plain; charset=utf-8\r\n" + + "\r\n" + + ">From test\r\n" + + ">>From pest", // \r\nÕÄÖÜ + mail = new Buffer(encodedText, "utf-8"); + + test.expect(1); + var mailparser = new MailParser(); + + for(var i=0, len = mail.length; iFrom pest"); + test.done(); + }); + } +}; diff --git a/test/nested.eml b/test/nested.eml new file mode 100644 index 0000000..52323a0 --- /dev/null +++ b/test/nested.eml @@ -0,0 +1,63 @@ +From: u@example.com +Content-Type: multipart/mixed; + boundary="----=_NextPart_000_0D48_01CE140D.19527DD0" + + +------=_NextPart_000_0D48_01CE140D.19527DD0 +Content-Type: multipart/related; + boundary="----=_NextPart_001_0D49_01CE140D.19527DD0" + + +------=_NextPart_001_0D49_01CE140D.19527DD0 +Content-Type: multipart/alternative; + boundary="----=_NextPart_002_0D4A_01CE140D.19527DD0" + + +------=_NextPart_002_0D4A_01CE140D.19527DD0 +Content-Transfer-Encoding: quoted-printable +Content-Type: text/plain; + charset="utf-8" + + +Dear Sir, + +Good evening. + + + + + +------=_NextPart_002_0D4A_01CE140D.19527DD0 +Content-Transfer-Encoding: quoted-printable +Content-Type: text/html; + charset="utf-8" + +

Dear Sir

+

Good evening.

+

+------=_NextPart_002_0D4A_01CE140D.19527DD0-- + +------=_NextPart_000_0D48_01CE140D.19527DD0 +Content-Type: multipart/alternative; boundary="===============1276485360==" +MIME-Version: 1.0 +Content-Disposition: inline + +--===============1276485360== +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: quoted-printable + + + +The footer + +--===============1276485360== +Content-Type: text/html; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: quoted-printable + +

The footer

+ +--===============1276485360==-- +------=_NextPart_000_0D48_01CE140D.19527DD0-- + diff --git a/test/windowsfail.js b/test/windowsfail.js new file mode 100644 index 0000000..15ecad0 --- /dev/null +++ b/test/windowsfail.js @@ -0,0 +1,22 @@ +var MailParser = require("../lib/mailparser").MailParser, + testCase = require('nodeunit').testCase, + utillib = require("util"), + encodinglib = require("encoding"); + +// This test fails in windows as iconv-lite does not support CP949 +exports["ks_c_5601-1987"] = function(test){ + var encodedText = "Subject: =?ks_c_5601-1987?B?vcU=?=\r\n"+ + "Content-Type: text/plain; charset=ks_c_5601-1987\r\n"+ + "Content-Transfer-Encoding: base64\r\n"+ + "\r\n"+ + "vcU=", + mail = new Buffer(encodedText, "utf-8"); + + var mailparser = new MailParser(); + mailparser.end(mail); + mailparser.on("end", function(mail){ + test.equal(mail.subject, "신"); + test.equal(mail.text.trim(), "신"); + test.done(); + }); +}; \ No newline at end of file