Пример #1
0
def test_long_line_in_signature():
    sender = "*****@*****.**"
    body = """Call me ASAP, please.This is about the last changes you deployed.

Thanks in advance,
some long text here which doesn't seem to be a signature at all
Bob"""

    text, extracted_signature = signature.extract(body, sender)
    eq_('\n'.join(body.splitlines()[:-1]), text)
    eq_('Bob', extracted_signature)

    body = """Thanks David,

    some *long* text here which doesn't seem to be a signature at all
    """
    ((body, None), signature.extract(body, "*****@*****.**"))
Пример #2
0
def test_message_shorter_SIGNATURE_MAX_LINES():
    sender = "*****@*****.**"
    body = """Call me ASAP, please.This is about the last changes you deployed.

Thanks in advance,
Bob"""
    text, extracted_signature = signature.extract(body, sender)
    eq_('\n'.join(body.splitlines()[:2]), text)
    eq_('\n'.join(body.splitlines()[-2:]), extracted_signature)
Пример #3
0
def test_over_2_text_lines_after_signature():
    body = """Blah

    Bob,
    If there are more than
    2 non signature lines in the end
    It's not signature
    """
    text, extracted_signature = signature.extract(body, "Bob")
    eq_(extracted_signature, None)
Пример #4
0
def test_text_line_in_signature():
    # test signature should consist of one solid part
    sender = "*****@*****.**"
    body = """Call me ASAP, please.This is about the last changes you deployed.

Thanks in advance,
some text which doesn't seem to be a signature at all
Bob"""

    text, extracted_signature = signature.extract(body, sender)
    eq_('\n'.join(body.splitlines()[:2]), text)
    eq_('\n'.join(body.splitlines()[-3:]), extracted_signature)
Пример #5
0
def test_messages_longer_SIGNATURE_MAX_LINES():
    for filename in os.listdir(STRIPPED):
        filename = os.path.join(STRIPPED, filename)
        if not filename.endswith('_body'):
            continue
        sender, body = dataset.parse_msg_sender(filename)
        text, extracted_signature = signature.extract(body, sender)
        extracted_signature = extracted_signature or ''
        with open(filename[:-len('body')] + 'signature') as ms:
            msg_signature = ms.read()
            eq_(msg_signature.strip(), extracted_signature.strip())
            stripped_msg = body.strip()[:len(body.strip())-len(msg_signature)]
            eq_(stripped_msg.strip(), text.strip())
Пример #6
0
def test_messages_longer_SIGNATURE_MAX_LINES():
    import sys
    kwargs = {}
    if sys.version_info > (3, 0):
        kwargs["encoding"] = "utf8"

    for filename in os.listdir(STRIPPED):
        filename = os.path.join(STRIPPED, filename)
        if not filename.endswith('_body'):
            continue
        sender, body = dataset.parse_msg_sender(filename)
        text, extracted_signature = extract(body, sender)
        extracted_signature = extracted_signature or ''
        with open(filename[:-len('body')] + 'signature', **kwargs) as ms:
            msg_signature = ms.read()
            eq_(msg_signature.strip(), extracted_signature.strip())
            stripped_msg = body.strip()[:len(body.strip()) - len(msg_signature)]
            eq_(stripped_msg.strip(), text.strip())
Пример #7
0
    def __init__(self, email_string):
        """
        Takes a raw email string and processes it into something useful
        """
        self.str = email_string
        self.raw = mime.from_string(self.str)

        to = self.raw.headers['To']
        if to is None:
            self.recipients = []
        else:
            to = to.lower()
            self.recipients = address.parse_list(to) if ',' in to else [address.parse(to)]

        # It's possible a recipient is None if it is something like
        # 'Undisclosed recipients:;'
        self.recipients = [r for r in self.recipients if r is not None]
        self.sender = address.parse(self.raw.headers['From'].lower())

        self.subject = self.raw.subject
        self.id = self.raw.message_id
        self.date = parse(self.raw.headers['Date'])
        self.content_encoding = self.raw.content_encoding[0]

        # Extract plaintext body
        if self.raw.content_type.is_singlepart():
            self.full_body = self.raw.body
        elif self.raw.content_type.is_multipart():
            for p in self.raw.parts:
                if p.content_type == 'text/plain':
                    self.full_body = p.body
                    break

        # Try to get signature
        self.body, self.signature = extract_signature(self.full_body)

        # Try ML approach if necessary
        if self.signature is None:
            self.body, self.signature = signature.extract(self.full_body, sender=self.sender)

        # Get replies only, not the quotes
        self.body = quotations.extract_from(self.body, 'text/plain')
Пример #8
0
def test_capitalized():
    msg_body = """Hi Mary,

Do you still need a DJ for your wedding? I've included a video demo of one of our DJs available for your wedding date.

DJ Doe 
http://example.com
Password: SUPERPASSWORD

Would you like to check out more?


At your service,

John Smith
Doe Inc
555-531-7967"""

    sig = """John Smith
Doe Inc
555-531-7967"""

    eq_(sig, extract(msg_body, 'Doe')[1])
Пример #9
0
def test_signature_extract_crash(has_signature):
    has_signature.side_effect = Exception("Bam!")
    msg_body = u"Blah\r\n--\r\n\r\nСергей"
    eq_((msg_body, None), extract(msg_body, "Сергей"))
Пример #10
0
def test_signature_extract_crash(has_signature):
    has_signature.side_effect = Exception('Bam!')
    msg_body = 'Blah\r\n--\r\n\r\nСергей'
    eq_((msg_body, None), signature.extract(msg_body, 'Сергей'))
Пример #11
0
import talon
# don't forget to init the library first
# it loads machine learning classifiers
talon.init()

from talon import signature

message = """Annette Anderson,
11833 Spring Laurel DR
Charlotte, NC 28215
704-724-4697
 
Client is a referral from our chiro- she is there now about to have her appointment. Please reach out in the next hour!
 
Respectfully,

Lacie N. Johnson
Intake Paralegal 
Law Offices of Shane Smith, PC
263 Hwy 74 N
Peachtree City, GA 30269

770.487.8999 ext. 42
770.631.7667 fax
"""

text, signature = signature.extract(message, sender='*****@*****.**')
print text
Пример #12
0
 def signature(self, sender, message):
     logger.info('message from %s', sender)
     text, sig = signature.extract(message, sender)
     logger.info('extracted %s', sig)
     return sig
Пример #13
0
def test_basic():
    msg_body = 'Blah\r\n--\r\n\r\nSergey Obukhov'
    eq_(('Blah', '--\r\n\r\nSergey Obukhov'),
        signature.extract(msg_body, 'Sergey'))
Пример #14
0
def test_no_signature():
    sender, body = "*****@*****.**", "Hello"
    eq_((body, None), extract(body, sender))
Пример #15
0
def receive_email(request):

    if request.method == 'POST':
        talon.init()
        from talon import signature

        sender = request.POST.get('sender')
        recipient = request.POST.get('recipient')
        subject = request.POST.get('subject', '')
        body_plain = request.POST.get('body-plain', '')
        text, signature = signature.extract(body_plain, sender=sender)
        body_without_quotes = request.POST.get('stripped-text', '')
        sender_name = get_object_or_404(User.objects.prefetch_related(),
                                        email=sender)
        raw_sender_name = sender_name.username
        synonyms = nltk_rel_words_email(subject + " " + text)
        print("Synonyms = ", synonyms)

        ## reply to post creates comment to that post
        if 'newpost' in recipient:
            print('found newhost')
            post_id = re.findall('([0-9]+)', recipient)
            print("Add comment to post ID ", post_id)
            print("Sender name ", sender_name.id)
            try:
                to_save_comment = Comment(
                    case=post_id,
                    comment=text,
                    commented_by=sender_name,
                )
                to_save_comment.save()
                print("Success!")
                return HttpResponse('OK')
            except Exception as e:
                print(e)

        else:

            if request.FILES:
                for key in request.FILES:
                    file = request.FILES[key]
                    # attachment_name = request.POST.get('attachment')
                    attachment_name = 'attachment'
                    to_save = Case(state=sender_name.state,
                                   county=sender_name.county,
                                   title=subject,
                                   issue_detail=text,
                                   created_by=str(raw_sender_name),
                                   issue_area_id=determine_area(recipient),
                                   related_document=file,
                                   related_document_name=attachment_name)
            else:
                to_save = Case(
                    state=sender_name.state,
                    county=sender_name.county,
                    title=subject,
                    issue_detail=text,
                    created_by=str(raw_sender_name),
                    issue_area_id=determine_area(recipient),
                )

            to_save.save()
            print(to_save.id)

            return_email_info(sender, recipient, subject, to_save.id)

            find_rel_questions_email(synonyms, determine_area(recipient),
                                     sender_name.county, to_save.id)

            # attachments:
        # for key in request.FILES:
        #     file = request.FILES[key]
        # do something with the file
    #
    #      # Returned text is ignored but HTTP status code matters:
    #      # Mailgun wants to see 2xx, otherwise it will make another attempt in 5 minutes
    return HttpResponse('OK')
Пример #16
0
 def signature(self, sender, message):
     logger.info('message from %s', sender)
     text, sig = signature.extract(message, sender)
     logger.info('extracted %s', sig)
     return sig
Пример #17
0
def remove_signature(message, sender):
    text, _signature = signature.extract(message, sender)
    return text
Пример #18
0
def test_no_signature():
    sender, body = "*****@*****.**", "Hello"
    eq_((body, None), signature.extract(body, sender))
Пример #19
0
def test_handles_unicode():
    sender, body = dataset.parse_msg_sender(UNICODE_MSG)
    text, extracted_signature = extract(body, sender)
Пример #20
0
def test_basic():
    raise SkipTest()

    msg_body = "Blah\r\n--\r\n\r\nSergey Obukhov"
    eq_(("Blah", "--\r\n\r\nSergey Obukhov"), extract(msg_body, "Sergey"))
Пример #21
0
def test_signature_extract_crash(has_signature):
    has_signature.side_effect = Exception('Bam!')
    msg_body = u'Blah\r\n--\r\n\r\nСергей'
    eq_((msg_body, None), extract(msg_body, 'Сергей'))
Пример #22
0
def test_basic():
    msg_body = 'Blah\r\n--\r\n\r\nSergey Obukhov'
    eq_(('Blah', '--\r\n\r\nSergey Obukhov'), extract(msg_body, 'Sergey'))
Пример #23
0
def test_handles_unicode():
    sender, body = dataset.parse_msg_sender(UNICODE_MSG)
    text, extracted_signature = signature.extract(body, sender)
def extractSignature_MachineLearning(message, sender):
	body, sig = signature.extract(message, sender=sender)
	return body, sig