diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..22e38a0
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,7 @@
+language: node_js
+node_js:
+ - '0.10'
+ - '0.12'
+ - '4'
+ - '5'
+ - '6'
diff --git a/README.md b/README.md
index 8e50175..833a281 100755
--- a/README.md
+++ b/README.md
@@ -2,6 +2,23 @@
Node.js port of Github's EmailReplyParser, a small library to parse plain text email content.
+## Usage
+
+``` js
+var EmailReplyParser = require('emailreplyparser')
+
+// To parse the reply from an email body
+var parsed = EmailReplyParser.parse(emailBody)
+
+// To parse the reply from an email body, preserving signatures
+var parsed = EmailReplyParser.parse(emailBody, true)
+
+// Reads in an email and produces an array of fragments.
+// Each fragment represents a part of the email.
+var fragments = EmailReplyParser.read(emailBody)
+```
+
+For examples, refer to the tests.
## Known Issues
(Taken from Github's version)
@@ -65,6 +82,24 @@ Apparently, prefixing lines with `>` isn't universal either:
To: Rick
-### To run the tests
-* Install nodeunit `npm install nodeunit`
-* Run the tests: `nodeunit test/email_reply_parser_test.js`
\ No newline at end of file
+## To run the tests
+
+* Install dependencies `npm install`
+* Run the tests: `npm test`
+
+## Upgrading to v1.0
+
+- The `EmailReplyParser` is now exported directly. If upgrading from pre 1.0, change the following:
+
+``` js
+var EmailReplyParser = require('emailreplyparser').EmailReplyParser
+```
+
+to:
+
+``` js
+var EmailReplyParser = require('emailreplyparser')
+```
+
+- The `parse_reply` function is now called `parse`.
+- The module no longer adds any methods to the `String` prototype. If your code was relying on the `trim`, `ltrim`, `strim`, `gsub`, `reverse` or `chomp` methods to be available on the prototype, you'll need to make changes.
diff --git a/lib/emailreplyparser.js b/lib/emailreplyparser.js
index 445a831..2a17eb2 100644
--- a/lib/emailreplyparser.js
+++ b/lib/emailreplyparser.js
@@ -25,77 +25,36 @@
//
// EmailReplyParser also attempts to figure out which of these blocks should
// be hidden from users.
-var EmailReplyParser = {
- VERSION: "0.4",
- // Public: Splits an email body into a list of Fragments.
- //
- // text - A String email body.
- //
- // Returns an Email instance.
- read: function(text) {
- var email = new Email();
- return email.read(text);
- },
+'use strict';
- // Public: Get the text of the visible portions of the given email body.
- //
- // text - A String email body.
- // [optional, default: false] include_signatures - Whether or not to include signatures in reply
- //
- // Returns a String.
- parse_reply: function (text, include_signatures) {
- if(typeof(include_signatures)==='undefined') include_signatures = false;
- return this.read(text).visible_text(include_signatures);
- }
-};
+/* jshint eqnull: true */
-String.prototype.trim = function() {
- return this.replace(/^\s*|\s*$/g, "");
-}
+// String manipulation utilities
+var trim = function(str) {
+ return str.replace(/^\s*|\s*$/g, '');
+};
-String.prototype.ltrim = function() {
- return this.replace(/^\s*/g, "");
-}
+var ltrim = function(str) {
+ return str.replace(/^\s*/g, '');
+};
-String.prototype.rtrim = function() {
- return this.replace(/\s*$/g, "");
-}
+var rtrim = function(str) {
+ return str.replace(/\s*$/g, '');
+};
-String.prototype.reverse = function() {
- var s = "";
- var i = this.length;
- while (i>0) {
- s += this.substring(i-1,i);
+var reverse = function(str) {
+ var s = '';
+ var i = str.length;
+ while (i > 0) {
+ s += str.substring(i-1, i);
i--;
}
return s;
-}
-
-//http://flochip.com/2011/09/06/rubys-string-gsub-in-javascript/
-String.prototype.gsub = function(source, pattern, replacement) {
- var match, result;
- if (!((pattern != null) && (replacement != null))) {
- return source;
- }
- result = '';
- while (source.length > 0) {
- if ((match = source.match(pattern))) {
- result += source.slice(0, match.index);
- result += replacement;
- source = source.slice(match.index + match[0].length);
- }
- else {
- result += source;
- source = '';
- }
- }
- return result;
};
-//http://3dmdesign.com/development/extending-javascript-strings-with-chomp-using-prototypes
-String.prototype.chomp = function() {
- return this.replace(/(\n|\r)+$/, '');
+var chomp = function(str) {
+ return str.replace(/(\n|\r)+$/, '');
};
// An Email instance represents a parsed body String.
@@ -117,8 +76,6 @@ Email.prototype = {
//
// Returns a String.
visible_text: function(include_signatures) {
- if(typeof(include_signatures)==='undefined') include_signatures = false;
-
var visible_text = [];
for (var key in this.fragments) {
if (!this.fragments[key].hidden || (include_signatures && this.fragments[key].signature)) {
@@ -126,7 +83,7 @@ Email.prototype = {
}
}
- return visible_text.join("\n").rtrim();
+ return rtrim(visible_text.join('\n'));
},
// Splits the given text into a list of Fragments. This is roughly done by
@@ -140,6 +97,9 @@ Email.prototype = {
// in 1.9 we want to operate on the raw bytes
// text = text.dup.force_encoding('binary') if text.respond_to?(:force_encoding)
+ // Normalize line endings.
+ text = text.replace('\r\n', '\n');
+
// Check for multi-line reply headers. Some clients break up
// the "On DATE, NAME wrote:" line into multiple lines.
var patt = /^(On\s(\n|.)*wrote:)$/m;
@@ -153,11 +113,11 @@ Email.prototype = {
// The text is reversed initially due to the way we check for hidden
// fragments.
- text = text.reverse();
+ text = reverse(text);
// This determines if any 'visible' Fragment has been found. Once any
// visible Fragment is found, stop looking for hidden ones.
- this.found_visible = false
+ this.found_visible = false;
// This instance variable points to the current Fragment. If the matched
// line fits, it should be added to this Fragment. Otherwise, finish it
@@ -191,16 +151,16 @@ Email.prototype = {
//
// Returns nothing.
scan_line: function(line) {
- var SIG_REGEX = '(--|__|\\w-$)|(^(\\w+\\s*){1,3} ' + ("Sent from my").reverse() + '$)';
+ var SIG_REGEX = '(--|__|\\w-$)|(^(\\w+\\s*){1,3} ' + reverse('Sent from my') + '$)';
- line = line.chomp('\n');
+ line = chomp(line);
if (!(new RegExp(SIG_REGEX)).test(line)) {
- line = line.ltrim();
+ line = ltrim(line);
}
// Mark the current Fragment as a signature if the current line is ''
// and the Fragment starts with a common signature indicator.
- if (this.fragment != null && line == '') {
+ if (this.fragment != null && line === '') {
if ((new RegExp(SIG_REGEX)).test(this.fragment.lines[this.fragment.lines.length - 1])) {
this.fragment.signature = true;
this.finish_fragment();
@@ -214,7 +174,7 @@ Email.prototype = {
// If the line matches the current fragment, add it. Note that a common
// reply header also counts as part of the quoted Fragment, even though
// it doesn't start with `>`.
- if (this.fragment != null && ((this.fragment.quoted == is_quoted) || (this.fragment.quoted && (this.quote_header(line) || line == '')))) {
+ if (this.fragment != null && ((this.fragment.quoted === is_quoted) || (this.fragment.quoted && (this.quote_header(line) || line === '')))) {
this.fragment.lines.push(line);
}
// Otherwise, finish the fragment and start a new one.
@@ -257,30 +217,32 @@ Email.prototype = {
// Player 2 (signature, hidden)
//
finish_fragment: function() {
- if (this.fragment != null) {
+ if(this.fragment != null) {
this.fragment.finish();
if (!this.found_visible) {
- if (this.fragment.quoted || this.fragment.signature || this.fragment.to_s().trim() == '')
+ if (this.fragment.quoted || this.fragment.signature || trim(this.fragment.to_s()) === '') {
this.fragment.hidden = true;
- else
+ }
+ else {
this.found_visible = true;
+ }
}
this.fragments.push(this.fragment);
this.fragment = null;
}
}
-}
+};
// Fragments
// Represents a group of paragraphs in the email sharing common attributes.
// Paragraphs should get their own fragment if they are a quoted area or a
// signature.
-var Fragment = function(quoted, first_line) {
- this.initialize(quoted, first_line)
-};
+function Fragment(quoted, first_line) {
+ this.initialize(quoted, first_line);
+}
Fragment.prototype = {
// This is an Array of String lines of content. Since the content is
@@ -297,16 +259,15 @@ Fragment.prototype = {
this.quoted = quoted;
this.lines = [first_line];
this.content = null;
- this.lines = this.lines.filter(function(){return true});
+ this.lines = this.lines.filter(function() { return true; });
},
// Builds the string content by joining the lines and reversing them.
//
// Returns nothing.
finish: function() {
- this.content = this.lines.join("\n");
+ this.content = reverse(this.lines.join('\n'));
this.lines = [];
- this.content = this.content.reverse();
},
to_s: function() {
@@ -314,6 +275,30 @@ Fragment.prototype = {
}
};
-module.exports.EmailReplyParser = EmailReplyParser;
+var EmailReplyParser = {
+ VERSION: '1.0',
+
+ // Public: Splits an email body into a list of Fragments.
+ //
+ // text - A String email body.
+ //
+ // Returns an Email instance.
+ read: function(text) {
+ var email = new Email();
+ return email.read(text);
+ },
+
+ // Public: Get the text of the visible portions of the given email body.
+ //
+ // text - A String email body.
+ // [optional, default: undefined] include_signatures - Whether or not to include signatures in reply
+ //
+ // Returns a String.
+ parse: function (text, include_signatures) {
+ return this.read(text).visible_text(include_signatures);
+ }
+};
+
+module.exports = EmailReplyParser;
-//console.log(EmailReplyParser.read("I get proper rendering as well.\n\nSent from a magnificent torch of pixels\n\nOn Dec 16, 2011, at 12:47 PM, Corey Donohoe\n\nwrote:\n\n> Was this caching related or fixed already? I get proper rendering here.\n>\n> \n>\n> ---\n> Reply to this email directly or view it on GitHub:\n> https://github.com/github/github/issues/2278#issuecomment-3182418\n"));
\ No newline at end of file
+//console.log(EmailReplyParser.read("I get proper rendering as well.\n\nSent from a magnificent torch of pixels\n\nOn Dec 16, 2011, at 12:47 PM, Corey Donohoe\n\nwrote:\n\n> Was this caching related or fixed already? I get proper rendering here.\n>\n> \n>\n> ---\n> Reply to this email directly or view it on GitHub:\n> https://github.com/github/github/issues/2278#issuecomment-3182418\n"));
diff --git a/package.json b/package.json
index 4c7c9b0..9983d71 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "emailreplyparser",
- "version": "0.0.5",
+ "version": "1.0.0",
"description": "Node.js Port of GitHub's email_reply_parser.rb",
"author": "Michael Owens (http://mowens.com/)",
"repository": {
@@ -8,6 +8,20 @@
"url": "https://github.com/mowens/emailreplyparser.git"
},
"main": "./lib/emailreplyparser",
- "engines": { "node": ">= 0.4.0" },
- "keywords": ["email", "parser", "emailreplyparser", "email_reply_parser"]
+ "engines": {
+ "node": ">= 0.4.0"
+ },
+ "scripts": {
+ "test": "nodeunit test/email_reply_parser_test.js"
+ },
+ "keywords": [
+ "email",
+ "parser",
+ "emailreplyparser",
+ "email_reply_parser"
+ ],
+ "devDependencies": {
+ "lodash": "^4.13.1",
+ "nodeunit": "^0.9.1"
+ }
}
diff --git a/test/email_reply_parser_test.js b/test/email_reply_parser_test.js
index 3165f3b..1c43231 100755
--- a/test/email_reply_parser_test.js
+++ b/test/email_reply_parser_test.js
@@ -1,12 +1,12 @@
-var fs = require('fs');
+'use strict';
-var _ = require('underscore');
+var fs = require('fs');
+var _ = require('lodash');
-var EmailReplyParser = require('../lib/emailreplyparser').EmailReplyParser;
+var EmailReplyParser = require('../lib/emailreplyparser');
function get_email(name) {
var data = fs.readFileSync(__dirname + '/emails/' + name + '.txt', 'ascii');
-
return EmailReplyParser.read(data);
}
@@ -15,21 +15,21 @@ function get_raw_email(name) {
}
exports.test_reads_simple_body = function(test){
- reply = get_email('email_1_1');
+ var reply = get_email('email_1_1');
test.equal(3, reply.fragments.length);
test.deepEqual([false, false, false], _.map(reply.fragments, function(f) { return f.quoted; }));
test.deepEqual([false, true, true], _.map(reply.fragments, function(f) { return f.signature; }));
test.deepEqual([false, true, true], _.map(reply.fragments, function(f) { return f.hidden; }));
- test.equal("Hi folks\n\nWhat is the best way to clear a Riak bucket of all key, values after\nrunning a test?\nI am currently using the Java HTTP API.\n", reply.fragments[0].to_s());
+ test.equal('Hi folks\n\nWhat is the best way to clear a Riak bucket of all key, values after\nrunning a test?\nI am currently using the Java HTTP API.\n', reply.fragments[0].to_s());
- test.equal("-Abhishek Kona\n\n", reply.fragments[1].to_s());
+ test.equal('-Abhishek Kona\n\n', reply.fragments[1].to_s());
test.done();
-}
+};
exports.test_reads_top_post = function(test){
- reply = get_email('email_1_3');
+ var reply = get_email('email_1_3');
test.equal(5, reply.fragments.length);
test.deepEqual([false, false, true, false, false], _.map(reply.fragments, function(f) { return f.quoted; }));
@@ -41,27 +41,27 @@ exports.test_reads_top_post = function(test){
test.ok((/^On [^\:]+\:/m).test(reply.fragments[2].to_s()));
test.ok((new RegExp('^_')).test(reply.fragments[4].to_s()));
test.done();
-}
+};
exports.test_reads_bottom_post = function(test){
- reply = get_email('email_1_2');
+ var reply = get_email('email_1_2');
test.equal(6, reply.fragments.length);
test.deepEqual([false, true, false, true, false, false], _.map(reply.fragments, function(f) { return f.quoted; }));
test.deepEqual([false, false, false, false, false, true], _.map(reply.fragments, function(f) { return f.signature; }));
test.deepEqual([false, false, false, true, true, true], _.map(reply.fragments, function(f) { return f.hidden; }));
- test.equal("Hi,", reply.fragments[0].to_s());
+ test.equal('Hi,', reply.fragments[0].to_s());
test.ok((new RegExp('^On [^\:]+\:')).test(reply.fragments[1].to_s()));
test.ok((/^You can list/m).test(reply.fragments[2].to_s()));
test.ok((/^> /m).test(reply.fragments[3].to_s()));
test.ok((new RegExp('^_')).test(reply.fragments[5].to_s()));
test.done();
-}
+};
exports.test_reads_inline_replies = function(test){
- reply = get_email('email_1_8');
+ var reply = get_email('email_1_8');
test.equal(7, reply.fragments.length);
test.deepEqual([true, false, true, false, true, false, false], _.map(reply.fragments, function(f) { return f.quoted; }));
@@ -76,26 +76,26 @@ exports.test_reads_inline_replies = function(test){
test.equal('', reply.fragments[5].to_s().trim());
test.ok((new RegExp('^-')).test(reply.fragments[6].to_s()));
test.done();
-}
+};
exports.test_recognizes_date_string_above_quote = function(test){
- reply = get_email('email_1_4');
+ var reply = get_email('email_1_4');
test.ok((/^Awesome/).test(reply.fragments[0].to_s()));
test.ok((/^On/m).test(reply.fragments[1].to_s()));
test.ok((/Loader/m).test(reply.fragments[1].to_s()));
test.done();
-}
+};
exports.test_a_complex_body_with_only_one_fragment = function(test){
- reply = get_email('email_1_5');
+ var reply = get_email('email_1_5');
test.equal(1, reply.fragments.length);
test.done();
-}
+};
exports.test_reads_email_with_correct_signature = function(test){
- reply = get_email('correct_sig');
+ var reply = get_email('correct_sig');
test.equal(2, reply.fragments.length);
@@ -105,81 +105,82 @@ exports.test_reads_email_with_correct_signature = function(test){
test.ok((new RegExp('^-- \nrick')).test(reply.fragments[1].to_s()));
test.done();
-}
+};
exports.test_deals_with_multiline_reply_headers = function(test){
- reply = get_email('email_1_6');
+ var reply = get_email('email_1_6');
test.ok((new RegExp('^I get')).test(reply.fragments[0].to_s()));
test.ok((/^On/m).test(reply.fragments[1].to_s()));
test.ok((new RegExp('Was this')).test(reply.fragments[1].to_s()));
test.done();
-}
+};
exports.test_does_not_modify_input_string = function(test){
- original = "The Quick Brown Fox Jumps Over The Lazy Dog";
+ var original = 'The Quick Brown Fox Jumps Over The Lazy Dog';
EmailReplyParser.read(original);
- test.equal("The Quick Brown Fox Jumps Over The Lazy Dog", original);
+ test.equal('The Quick Brown Fox Jumps Over The Lazy Dog', original);
test.done();
-}
+};
exports.test_returns_only_the_visible_fragments_as_a_string = function(test){
- reply = get_email('email_2_1');
+ var reply = get_email('email_2_1');
String.prototype.rtrim = function() {
- return this.replace(/\s*$/g, "");
- }
+ return this.replace(/\s*$/g, '');
+ };
- var fragments = _.select(reply.fragments, function(f) { return !f.hidden; });
- var fragments = _.map(fragments, function(f) { return f.to_s(); });
- test.equal(fragments.join("\n").rtrim(), reply.visible_text());
+ var fragments = _(reply.fragments)
+ .filter(function(f) { return !f.hidden; })
+ .map(function(f) { return f.to_s(); });
+ test.equal(fragments.join('\n').rtrim(), reply.visible_text());
test.done();
-}
+};
exports.test_parse_out_just_top_for_outlook_reply = function(test){
- body = get_raw_email('email_2_1');
- test.equal("Outlook with a reply", EmailReplyParser.parse_reply(body));
+ var body = get_raw_email('email_2_1');
+ test.equal('Outlook with a reply', EmailReplyParser.parse(body));
test.done();
-}
+};
exports.test_parse_out_sent_from_iPhone = function(test){
- body = get_raw_email('email_iPhone');
- test.equal("Here is another email", EmailReplyParser.parse_reply(body));
+ var body = get_raw_email('email_iPhone');
+ test.equal('Here is another email', EmailReplyParser.parse(body));
test.done();
-}
+};
exports.test_parse_out_sent_from_BlackBerry = function(test){
- body = get_raw_email('email_BlackBerry');
- test.equal("Here is another email", EmailReplyParser.parse_reply(body));
+ var body = get_raw_email('email_BlackBerry');
+ test.equal('Here is another email', EmailReplyParser.parse(body));
test.done();
-}
+};
exports.test_parse_out_send_from_multiword_mobile_device = function(test){
- body = get_raw_email('email_multi_word_sent_from_my_mobile_device');
- test.equal("Here is another email", EmailReplyParser.parse_reply(body));
+ var body = get_raw_email('email_multi_word_sent_from_my_mobile_device');
+ test.equal('Here is another email', EmailReplyParser.parse(body));
test.done();
-}
+};
exports.test_do_not_parse_out_send_from_in_regular_sentence = function(test){
- body = get_raw_email('email_sent_from_my_not_signature');
- test.equal("Here is another email\n\nSent from my desk, is much easier then my mobile phone.", EmailReplyParser.parse_reply(body));
+ var body = get_raw_email('email_sent_from_my_not_signature');
+ test.equal('Here is another email\n\nSent from my desk, is much easier then my mobile phone.', EmailReplyParser.parse(body));
test.done();
-}
+};
exports.test_retains_bullets = function(test){
- body = get_raw_email('email_bullets');
- test.equal("test 2 this should list second\n\nand have spaces\n\nand retain this formatting\n\n\n - how about bullets\n - and another", EmailReplyParser.parse_reply(body));
+ var body = get_raw_email('email_bullets');
+ test.equal('test 2 this should list second\n\nand have spaces\n\nand retain this formatting\n\n\n - how about bullets\n - and another', EmailReplyParser.parse(body));
test.done();
-}
+};
-exports.test_parse_reply = function(test){
- body = get_raw_email('email_1_2');
- test.equal(EmailReplyParser.read(body).visible_text(), EmailReplyParser.parse_reply(body));
+exports.test_parse = function(test){
+ var body = get_raw_email('email_1_2');
+ test.equal(EmailReplyParser.read(body).visible_text(), EmailReplyParser.parse(body));
test.done();
-}
+};
exports.test_correctly_reads_top_post_when_line_starts_with_On = function(test){
- reply = get_email('email_1_7');
+ var reply = get_email('email_1_7');
test.equal(5, reply.fragments.length);
test.deepEqual([false, false, true, false, false], _.map(reply.fragments, function(f) { return f.quoted; }));
@@ -191,4 +192,4 @@ exports.test_correctly_reads_top_post_when_line_starts_with_On = function(test){
test.ok((/^On [^\:]+\:/m).test(reply.fragments[2].to_s()));
test.ok((new RegExp('^_')).test(reply.fragments[4].to_s()));
test.done();
-}
+};