Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Yahoo!, GMail fixes #42

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
10 changes: 6 additions & 4 deletions email_reply_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class EmailMessage(object):
def __init__(self, text):
self.fragments = []
self.fragment = None
self.text = text.replace('\r\n', '\n')
self.text = '\n'.join(text.splitlines())
self.found_visible = False

def read(self):
Expand Down Expand Up @@ -94,18 +94,20 @@ def _scan_line(self, line):

line - a row of text from an email message
"""
is_quote_header = self.QUOTE_HDR_REGEX.match(line) is not None
stripped_line = line.strip()

is_quote_header = self.QUOTE_HDR_REGEX.match(stripped_line) is not None
is_quoted = self.QUOTED_REGEX.match(line) is not None
is_header = is_quote_header or self.HEADER_REGEX.match(line) is not None

if self.fragment and len(line.strip()) == 0:
if self.fragment and len(stripped_line) == 0:
if self.SIG_REGEX.match(self.fragment.lines[-1].strip()):
self.fragment.signature = True
self._finish_fragment()

if self.fragment \
and ((self.fragment.headers == is_header and self.fragment.quoted == is_quoted) or
(self.fragment.quoted and (is_quote_header or len(line.strip()) == 0))):
(self.fragment.quoted and (is_quote_header or len(stripped_line) == 0))):

self.fragment.lines.append(line)
else:
Expand Down
2 changes: 1 addition & 1 deletion email_reply_parser/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = '0.5.11'
VERSION = '0.5.13'
7 changes: 7 additions & 0 deletions test/emails/email_1_10.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Base tax cost environment side. May house most director treatment call heavy.
Forward professional woman institution happen. Tell girl hope to. Wrong perhaps apply anything expert main indeed.

On Monday, April 13, 2020, 06:49:16 PM GMT+3, Paige Lee wrote:

Thank experience bag memory hundred understand of. Environmental lose probably majority peace behind. When produce ask tough.
Institution thought system class nice instead speak.
9 changes: 9 additions & 0 deletions test/emails/email_1_11.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Admit high represent movement.
Everything car rest perform late either among. Available help threat across spring necessary.
Develop line class impact pick generation. Join day design simply.

On Tue, Apr 14, 2020 at 6:13 PM Alexandru via Sailo
wrote:

Music easy though onto form top run agency. Arrive senior away total help. Foot partner between store energy out.
Water stock garden just. Skill design condition after why ten executive.
9 changes: 9 additions & 0 deletions test/emails/email_1_9.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Resource popular local capital doctor. Wish with think north shoulder stand catch. Decade many production food view only green.

Believe concern floor treatment admit keep maintain put.
On Friday, April 3, 2020, 06:05:24 PM EDT, Vicki Davis wrote:


Example myself effect understand miss idea. Tonight work home policy arm time report.

Against rest concern each hotel. Person care policy sea. Attack realize suggest save all everything scientist.
129 changes: 126 additions & 3 deletions test/test_email_reply_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,129 @@ def test_complex_body_with_one_fragment(self):

self.assertEqual(1, len(message.fragments))

def test_whitespace_before_header(self):
'''Header has whitespace at the beginning of the line.

Seen in Yahoo! Mail (April 2020) with rich text reply.
'''

message = self.get_email('email_1_9')

self.assertEqual(
3,
len(message.fragments)
)

self.assertEqual(
[False, False, False],
[f.quoted for f in message.fragments]
)

self.assertEqual(
[False, False, False],
[f.signature for f in message.fragments]
)

self.assertEqual(
[False, True, False],
[f.headers for f in message.fragments]
)

self.assertEqual(
[False, True, True],
[f.hidden for f in message.fragments]
)

self.assertEqual(
("Resource popular local capital doctor. "
"Wish with think north shoulder stand catch. "
"Decade many production food view only green.\n"
"\n"
"Believe concern floor treatment admit keep maintain put."),
message.reply)

def test_quote_not_quoted(self):
'''Original email is not quoted at all.

Seen in Yahoo! Mail (April 2020) with plain text reply.
'''

message = self.get_email('email_1_10')

self.assertEqual(
3,
len(message.fragments)
)

self.assertEqual(
[False, False, False],
[f.quoted for f in message.fragments]
)

self.assertEqual(
[False, False, False],
[f.signature for f in message.fragments]
)

self.assertEqual(
[False, True, False],
[f.headers for f in message.fragments]
)

self.assertEqual(
[False, True, True],
[f.hidden for f in message.fragments]
)

self.assertEqual(
("Base tax cost environment side. "
"May house most director treatment call heavy.\n"
"Forward professional woman institution happen. "
"Tell girl hope to. "
"Wrong perhaps apply anything expert main indeed."),
message.reply)

def test_header_on_multiple_lines(self):
'''Header is split into multiple lines

Seen in GMail (April 2020); line length was 78 fwiw
'''

message = self.get_email('email_1_11')

self.assertEqual(
3,
len(message.fragments)
)

self.assertEqual(
[False, False, False],
[f.quoted for f in message.fragments]
)

self.assertEqual(
[False, False, False],
[f.signature for f in message.fragments]
)

self.assertEqual(
[False, True, False],
[f.headers for f in message.fragments]
)

self.assertEqual(
[False, True, True],
[f.hidden for f in message.fragments]
)

self.assertEqual(
("Admit high represent movement.\n"
"Everything car rest perform late either among. "
"Available help threat across spring necessary.\n"
"Develop line class impact pick generation. "
"Join day design simply."),
message.reply)

def test_verify_reads_signature_correct(self):
message = self.get_email('correct_sig')
self.assertEqual(2, len(message.fragments))
Expand Down Expand Up @@ -166,17 +289,17 @@ def test_multiple_on(self):
self.assertTrue(re.match('^On 9 Jan 2014', message.fragments[1].content))

self.assertEqual(
[False, True, False],
[False, True],
[fragment.quoted for fragment in message.fragments]
)

self.assertEqual(
[False, False, False],
[False, False],
[fragment.signature for fragment in message.fragments]
)

self.assertEqual(
[False, True, True],
[False, True],
[fragment.hidden for fragment in message.fragments]
)

Expand Down