From b69d5bbf15dc6ff7f67d5ec630bb296477799c9c Mon Sep 17 00:00:00 2001 From: Baptiste JAMIN Date: Sat, 30 Apr 2016 16:02:09 +0200 Subject: [PATCH 1/4] fix gmail replies --- lib/emailreplyparser.js | 8 ++++++++ test/email_reply_parser_test.js | 6 ++++++ test/emails/test_parse_out_send_from_multiword_gmail.txt | 5 +++++ 3 files changed, 19 insertions(+) create mode 100644 test/emails/test_parse_out_send_from_multiword_gmail.txt diff --git a/lib/emailreplyparser.js b/lib/emailreplyparser.js index 445a831..995a79c 100644 --- a/lib/emailreplyparser.js +++ b/lib/emailreplyparser.js @@ -151,6 +151,14 @@ Email.prototype = { text = text.replace(reply_header, reply_header.replace(/\n/g, ' ')); } + // Check gmail pattern + // DATE NAME : + var GmailPatt = /^(\n.*?\s.*?\<[\n]?.*?\>\:)$/m; + if(GmailPatt.test(text)) { + var reply_header = (GmailPatt.exec(text))[0]; + text = text.replace(reply_header, ' '); + } + // The text is reversed initially due to the way we check for hidden // fragments. text = text.reverse(); diff --git a/test/email_reply_parser_test.js b/test/email_reply_parser_test.js index 3165f3b..3708d10 100755 --- a/test/email_reply_parser_test.js +++ b/test/email_reply_parser_test.js @@ -192,3 +192,9 @@ exports.test_correctly_reads_top_post_when_line_starts_with_On = function(test){ test.ok((new RegExp('^_')).test(reply.fragments[4].to_s())); test.done(); } + +exports.test_parse_out_send_from_multiword_gmail = function(test){ + body = get_raw_email('test_parse_out_send_from_multiword_gmail'); + test.equal("Awesome! I haven't had another problem with it.", EmailReplyParser.parse_reply(body)); + test.done(); +} diff --git a/test/emails/test_parse_out_send_from_multiword_gmail.txt b/test/emails/test_parse_out_send_from_multiword_gmail.txt new file mode 100644 index 0000000..93b8b76 --- /dev/null +++ b/test/emails/test_parse_out_send_from_multiword_gmail.txt @@ -0,0 +1,5 @@ +Awesome! I haven't had another problem with it. + +2016-04-30 14:17 GMT+02:00 defunkt: + +> Loader seems to be working well. From 2d3d20108e128cec8166c0477c7fda2c3c34fe92 Mon Sep 17 00:00:00 2001 From: Baptiste JAMIN Date: Sun, 1 May 2016 15:54:35 +0200 Subject: [PATCH 2/4] fix hotmail replies --- lib/emailreplyparser.js | 12 ++++++++++++ test/email_reply_parser_test.js | 8 ++++++++ test/emails/email_hotmail.txt | 8 ++++++++ 3 files changed, 28 insertions(+) create mode 100644 test/emails/email_hotmail.txt diff --git a/lib/emailreplyparser.js b/lib/emailreplyparser.js index 995a79c..9e48337 100644 --- a/lib/emailreplyparser.js +++ b/lib/emailreplyparser.js @@ -159,6 +159,9 @@ Email.prototype = { text = text.replace(reply_header, ' '); } + text = this.remove_headers(text); + + // The text is reversed initially due to the way we check for hidden // fragments. text = text.reverse(); @@ -242,6 +245,15 @@ Email.prototype = { return (new RegExp('^:etorw.*nO$')).test(line); }, + remove_headers: function(text) { + var pattern = /^From:\s.*$/mi; + if(pattern.test(text)) { + var reply_header = (pattern.exec(text))[0]; + text = text.substring(0, text.indexOf(reply_header)); + } + return text; + }, + // Builds the fragment string and reverses it, after all lines have been // added. It also checks to see if this Fragment is hidden. The hidden // Fragment check reads from the bottom to the top. diff --git a/test/email_reply_parser_test.js b/test/email_reply_parser_test.js index 3708d10..f4f338a 100755 --- a/test/email_reply_parser_test.js +++ b/test/email_reply_parser_test.js @@ -198,3 +198,11 @@ exports.test_parse_out_send_from_multiword_gmail = function(test){ test.equal("Awesome! I haven't had another problem with it.", EmailReplyParser.parse_reply(body)); test.done(); } + + +exports.test_parse_out_send_from_hotmail = function(test){ + body = get_raw_email('email_hotmail'); + test.equal("I replied", EmailReplyParser.parse_reply(body)); + test.done(); +} + diff --git a/test/emails/email_hotmail.txt b/test/emails/email_hotmail.txt new file mode 100644 index 0000000..64b9b0e --- /dev/null +++ b/test/emails/email_hotmail.txt @@ -0,0 +1,8 @@ +I replied + +From: examples@email.goalengine.com +To: reply@reply.github.com +Subject: GitHub | You received a reply +Date: Sat, 30 Apr 2016 17:00:42 +0000 + +Message \ No newline at end of file From f5c6618e38fa6b3400c32c2882ac3f89c4066748 Mon Sep 17 00:00:00 2001 From: Baptiste Jamin Date: Fri, 14 Oct 2016 21:25:55 +0200 Subject: [PATCH 3/4] fix french --- lib/emailreplyparser.js | 22 ++++++++++++++++------ package.json | 14 ++++++++++++-- test/email_reply_parser_test.js | 9 +++++++-- test/emails/email_french.txt | 3 +++ 4 files changed, 38 insertions(+), 10 deletions(-) create mode 100644 test/emails/email_french.txt diff --git a/lib/emailreplyparser.js b/lib/emailreplyparser.js index 9e48337..9fba941 100644 --- a/lib/emailreplyparser.js +++ b/lib/emailreplyparser.js @@ -142,15 +142,25 @@ Email.prototype = { // Check for multi-line reply headers. Some clients break up // the "On DATE, NAME wrote:" line into multiple lines. - var patt = /^(On\s(\n|.)*wrote:)$/m; - var doubleOnPatt = /^(On\s(\n|.)*(^(> )?On\s)((\n|.)*)wrote:)$/m; + var englishPatt = /^(On\s(\n|.)*wrote:)$/mi; + var englishDoubleOnPatt = /^(On\s(\n|.)*(^(> )?On\s)((\n|.)*)wrote:)$/mi; - if(patt.test(text) && !doubleOnPatt.test(text)) { - var reply_header = (patt.exec(text))[0]; - // Remove all new lines from the reply header. + if(englishPatt.test(text) && !englishDoubleOnPatt.test(text)) { + var reply_header = (englishPatt.exec(text))[0]; text = text.replace(reply_header, reply_header.replace(/\n/g, ' ')); } + var frenchPatt = /^(Le\s(\n|.)*a écrit:)$/mi; + var frenchDoubleOnPatt = /^(Le\s(\n|.)*(\s)((\n|.)*)a écrit:)$/mi; + + if(frenchPatt.test(text) && !frenchDoubleOnPatt.test(text)) { + var reply_header = (frenchPatt.exec(text))[0]; + text = text.replace(reply_header, ''); + } else if (frenchDoubleOnPatt.test(text)) { + var reply_header = (frenchDoubleOnPatt.exec(text))[0]; + text = text.replace(reply_header, ''); + } + // Check gmail pattern // DATE NAME : var GmailPatt = /^(\n.*?\s.*?\<[\n]?.*?\>\:)$/m; @@ -336,4 +346,4 @@ Fragment.prototype = { module.exports.EmailReplyParser = EmailReplyParser; -//console.log(EmailReplyParser.read("I get proper rendering as well.\n\nSent from a magnificent torch of pixels\n\nOn Dec 16, 2011, at 12:47 PM, Corey Donohoe\n\nwrote:\n\n> Was this caching related or fixed already? I get proper rendering here.\n>\n> ![](https://img.skitch.com/20111216-m9munqjsy112yqap5cjee5wr6c.jpg)\n>\n> ---\n> Reply to this email directly or view it on GitHub:\n> https://github.com/github/github/issues/2278#issuecomment-3182418\n")); \ No newline at end of file +//console.log(EmailReplyParser.read("I get proper rendering as well.\n\nSent from a magnificent torch of pixels\n\nOn Dec 16, 2011, at 12:47 PM, Corey Donohoe\n\nwrote:\n\n> Was this caching related or fixed already? I get proper rendering here.\n>\n> ![](https://img.skitch.com/20111216-m9munqjsy112yqap5cjee5wr6c.jpg)\n>\n> ---\n> Reply to this email directly or view it on GitHub:\n> https://github.com/github/github/issues/2278#issuecomment-3182418\n")); diff --git a/package.json b/package.json index 4c7c9b0..f1cdd3c 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,16 @@ "url": "https://github.com/mowens/emailreplyparser.git" }, "main": "./lib/emailreplyparser", - "engines": { "node": ">= 0.4.0" }, - "keywords": ["email", "parser", "emailreplyparser", "email_reply_parser"] + "engines": { + "node": ">= 0.4.0" + }, + "keywords": [ + "email", + "parser", + "emailreplyparser", + "email_reply_parser" + ], + "devDependencies": { + "underscore": "^1.8.3" + } } diff --git a/test/email_reply_parser_test.js b/test/email_reply_parser_test.js index f4f338a..b211ace 100755 --- a/test/email_reply_parser_test.js +++ b/test/email_reply_parser_test.js @@ -5,13 +5,13 @@ var _ = require('underscore'); var EmailReplyParser = require('../lib/emailreplyparser').EmailReplyParser; function get_email(name) { - var data = fs.readFileSync(__dirname + '/emails/' + name + '.txt', 'ascii'); + var data = fs.readFileSync(__dirname + '/emails/' + name + '.txt', 'utf-8'); return EmailReplyParser.read(data); } function get_raw_email(name) { - return fs.readFileSync(__dirname + '/emails/' + name + '.txt', 'ascii'); + return fs.readFileSync(__dirname + '/emails/' + name + '.txt', 'utf-8'); } exports.test_reads_simple_body = function(test){ @@ -206,3 +206,8 @@ exports.test_parse_out_send_from_hotmail = function(test){ test.done(); } +exports.test_parse_out_send_from_french = function(test){ + body = get_raw_email('email_french'); + test.equal("On a 60% de test sur toute l'Infra", EmailReplyParser.parse_reply(body)); + test.done(); +} diff --git a/test/emails/email_french.txt b/test/emails/email_french.txt new file mode 100644 index 0000000..3ef5586 --- /dev/null +++ b/test/emails/email_french.txt @@ -0,0 +1,3 @@ +On a 60% de test sur toute l'Infra + +Le 14 oct. 2016 7:54 PM, "www.acme.com" a écrit: From 18c3b850fa0615e6b05bd480340314e9bdf9e49b Mon Sep 17 00:00:00 2001 From: Baptiste Jamin Date: Fri, 14 Oct 2016 21:37:24 +0200 Subject: [PATCH 4/4] fix french --- lib/emailreplyparser.js | 2 +- test/emails/email_french.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/emailreplyparser.js b/lib/emailreplyparser.js index 9fba941..1c50665 100644 --- a/lib/emailreplyparser.js +++ b/lib/emailreplyparser.js @@ -151,7 +151,7 @@ Email.prototype = { } var frenchPatt = /^(Le\s(\n|.)*a écrit:)$/mi; - var frenchDoubleOnPatt = /^(Le\s(\n|.)*(\s)((\n|.)*)a écrit:)$/mi; + var frenchDoubleOnPatt = /^(Le\s(\n|.)*(\s)((\n|.)*)a écrit :)$/mi; if(frenchPatt.test(text) && !frenchDoubleOnPatt.test(text)) { var reply_header = (frenchPatt.exec(text))[0]; diff --git a/test/emails/email_french.txt b/test/emails/email_french.txt index 3ef5586..3031924 100644 --- a/test/emails/email_french.txt +++ b/test/emails/email_french.txt @@ -1,3 +1,4 @@ On a 60% de test sur toute l'Infra -Le 14 oct. 2016 7:54 PM, "www.acme.com" a écrit: +Le 14 oct. 2016 7:54 PM, "www.acme.com" < +session_7d0qe51e-4c4-4q24-c90f-8cbd5bf0d93f+in@mail.test.im> a écrit :