diff --git a/lib/emailreplyparser.js b/lib/emailreplyparser.js index 445a831..1c50665 100644 --- a/lib/emailreplyparser.js +++ b/lib/emailreplyparser.js @@ -142,15 +142,36 @@ Email.prototype = { // Check for multi-line reply headers. Some clients break up // the "On DATE, NAME wrote:" line into multiple lines. - var patt = /^(On\s(\n|.)*wrote:)$/m; - var doubleOnPatt = /^(On\s(\n|.)*(^(> )?On\s)((\n|.)*)wrote:)$/m; + var englishPatt = /^(On\s(\n|.)*wrote:)$/mi; + var englishDoubleOnPatt = /^(On\s(\n|.)*(^(> )?On\s)((\n|.)*)wrote:)$/mi; - if(patt.test(text) && !doubleOnPatt.test(text)) { - var reply_header = (patt.exec(text))[0]; - // Remove all new lines from the reply header. + if(englishPatt.test(text) && !englishDoubleOnPatt.test(text)) { + var reply_header = (englishPatt.exec(text))[0]; text = text.replace(reply_header, reply_header.replace(/\n/g, ' ')); } + var frenchPatt = /^(Le\s(\n|.)*a écrit:)$/mi; + var frenchDoubleOnPatt = /^(Le\s(\n|.)*(\s)((\n|.)*)a écrit :)$/mi; + + if(frenchPatt.test(text) && !frenchDoubleOnPatt.test(text)) { + var reply_header = (frenchPatt.exec(text))[0]; + text = text.replace(reply_header, ''); + } else if (frenchDoubleOnPatt.test(text)) { + var reply_header = (frenchDoubleOnPatt.exec(text))[0]; + text = text.replace(reply_header, ''); + } + + // Check gmail pattern + // DATE NAME : + var GmailPatt = /^(\n.*?\s.*?\<[\n]?.*?\>\:)$/m; + if(GmailPatt.test(text)) { + var reply_header = (GmailPatt.exec(text))[0]; + text = text.replace(reply_header, ' '); + } + + text = this.remove_headers(text); + + // The text is reversed initially due to the way we check for hidden // fragments. text = text.reverse(); @@ -234,6 +255,15 @@ Email.prototype = { return (new RegExp('^:etorw.*nO$')).test(line); }, + remove_headers: function(text) { + var pattern = /^From:\s.*$/mi; + if(pattern.test(text)) { + var reply_header = (pattern.exec(text))[0]; + text = text.substring(0, text.indexOf(reply_header)); + } + return text; + }, + // Builds the fragment string and reverses it, after all lines have been // added. It also checks to see if this Fragment is hidden. The hidden // Fragment check reads from the bottom to the top. @@ -316,4 +346,4 @@ Fragment.prototype = { module.exports.EmailReplyParser = EmailReplyParser; -//console.log(EmailReplyParser.read("I get proper rendering as well.\n\nSent from a magnificent torch of pixels\n\nOn Dec 16, 2011, at 12:47 PM, Corey Donohoe\n\nwrote:\n\n> Was this caching related or fixed already? I get proper rendering here.\n>\n> ![](https://img.skitch.com/20111216-m9munqjsy112yqap5cjee5wr6c.jpg)\n>\n> ---\n> Reply to this email directly or view it on GitHub:\n> https://github.com/github/github/issues/2278#issuecomment-3182418\n")); \ No newline at end of file +//console.log(EmailReplyParser.read("I get proper rendering as well.\n\nSent from a magnificent torch of pixels\n\nOn Dec 16, 2011, at 12:47 PM, Corey Donohoe\n\nwrote:\n\n> Was this caching related or fixed already? I get proper rendering here.\n>\n> ![](https://img.skitch.com/20111216-m9munqjsy112yqap5cjee5wr6c.jpg)\n>\n> ---\n> Reply to this email directly or view it on GitHub:\n> https://github.com/github/github/issues/2278#issuecomment-3182418\n")); diff --git a/package.json b/package.json index 4c7c9b0..f1cdd3c 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,16 @@ "url": "https://github.com/mowens/emailreplyparser.git" }, "main": "./lib/emailreplyparser", - "engines": { "node": ">= 0.4.0" }, - "keywords": ["email", "parser", "emailreplyparser", "email_reply_parser"] + "engines": { + "node": ">= 0.4.0" + }, + "keywords": [ + "email", + "parser", + "emailreplyparser", + "email_reply_parser" + ], + "devDependencies": { + "underscore": "^1.8.3" + } } diff --git a/test/email_reply_parser_test.js b/test/email_reply_parser_test.js index 3165f3b..b211ace 100755 --- a/test/email_reply_parser_test.js +++ b/test/email_reply_parser_test.js @@ -5,13 +5,13 @@ var _ = require('underscore'); var EmailReplyParser = require('../lib/emailreplyparser').EmailReplyParser; function get_email(name) { - var data = fs.readFileSync(__dirname + '/emails/' + name + '.txt', 'ascii'); + var data = fs.readFileSync(__dirname + '/emails/' + name + '.txt', 'utf-8'); return EmailReplyParser.read(data); } function get_raw_email(name) { - return fs.readFileSync(__dirname + '/emails/' + name + '.txt', 'ascii'); + return fs.readFileSync(__dirname + '/emails/' + name + '.txt', 'utf-8'); } exports.test_reads_simple_body = function(test){ @@ -192,3 +192,22 @@ exports.test_correctly_reads_top_post_when_line_starts_with_On = function(test){ test.ok((new RegExp('^_')).test(reply.fragments[4].to_s())); test.done(); } + +exports.test_parse_out_send_from_multiword_gmail = function(test){ + body = get_raw_email('test_parse_out_send_from_multiword_gmail'); + test.equal("Awesome! I haven't had another problem with it.", EmailReplyParser.parse_reply(body)); + test.done(); +} + + +exports.test_parse_out_send_from_hotmail = function(test){ + body = get_raw_email('email_hotmail'); + test.equal("I replied", EmailReplyParser.parse_reply(body)); + test.done(); +} + +exports.test_parse_out_send_from_french = function(test){ + body = get_raw_email('email_french'); + test.equal("On a 60% de test sur toute l'Infra", EmailReplyParser.parse_reply(body)); + test.done(); +} diff --git a/test/emails/email_french.txt b/test/emails/email_french.txt new file mode 100644 index 0000000..3031924 --- /dev/null +++ b/test/emails/email_french.txt @@ -0,0 +1,4 @@ +On a 60% de test sur toute l'Infra + +Le 14 oct. 2016 7:54 PM, "www.acme.com" < +session_7d0qe51e-4c4-4q24-c90f-8cbd5bf0d93f+in@mail.test.im> a écrit : diff --git a/test/emails/email_hotmail.txt b/test/emails/email_hotmail.txt new file mode 100644 index 0000000..64b9b0e --- /dev/null +++ b/test/emails/email_hotmail.txt @@ -0,0 +1,8 @@ +I replied + +From: examples@email.goalengine.com +To: reply@reply.github.com +Subject: GitHub | You received a reply +Date: Sat, 30 Apr 2016 17:00:42 +0000 + +Message \ No newline at end of file diff --git a/test/emails/test_parse_out_send_from_multiword_gmail.txt b/test/emails/test_parse_out_send_from_multiword_gmail.txt new file mode 100644 index 0000000..93b8b76 --- /dev/null +++ b/test/emails/test_parse_out_send_from_multiword_gmail.txt @@ -0,0 +1,5 @@ +Awesome! I haven't had another problem with it. + +2016-04-30 14:17 GMT+02:00 defunkt: + +> Loader seems to be working well.