def test_long_line_in_signature(): sender = "*****@*****.**" body = """Call me ASAP, please.This is about the last changes you deployed. Thanks in advance, some long text here which doesn't seem to be a signature at all Bob""" text, extracted_signature = signature.extract(body, sender) eq_('\n'.join(body.splitlines()[:-1]), text) eq_('Bob', extracted_signature) body = """Thanks David, some *long* text here which doesn't seem to be a signature at all """ ((body, None), signature.extract(body, "*****@*****.**"))
def test_message_shorter_SIGNATURE_MAX_LINES(): sender = "*****@*****.**" body = """Call me ASAP, please.This is about the last changes you deployed. Thanks in advance, Bob""" text, extracted_signature = signature.extract(body, sender) eq_('\n'.join(body.splitlines()[:2]), text) eq_('\n'.join(body.splitlines()[-2:]), extracted_signature)
def test_over_2_text_lines_after_signature(): body = """Blah Bob, If there are more than 2 non signature lines in the end It's not signature """ text, extracted_signature = signature.extract(body, "Bob") eq_(extracted_signature, None)
def test_text_line_in_signature(): # test signature should consist of one solid part sender = "*****@*****.**" body = """Call me ASAP, please.This is about the last changes you deployed. Thanks in advance, some text which doesn't seem to be a signature at all Bob""" text, extracted_signature = signature.extract(body, sender) eq_('\n'.join(body.splitlines()[:2]), text) eq_('\n'.join(body.splitlines()[-3:]), extracted_signature)
def test_messages_longer_SIGNATURE_MAX_LINES(): for filename in os.listdir(STRIPPED): filename = os.path.join(STRIPPED, filename) if not filename.endswith('_body'): continue sender, body = dataset.parse_msg_sender(filename) text, extracted_signature = signature.extract(body, sender) extracted_signature = extracted_signature or '' with open(filename[:-len('body')] + 'signature') as ms: msg_signature = ms.read() eq_(msg_signature.strip(), extracted_signature.strip()) stripped_msg = body.strip()[:len(body.strip())-len(msg_signature)] eq_(stripped_msg.strip(), text.strip())
def test_messages_longer_SIGNATURE_MAX_LINES(): import sys kwargs = {} if sys.version_info > (3, 0): kwargs["encoding"] = "utf8" for filename in os.listdir(STRIPPED): filename = os.path.join(STRIPPED, filename) if not filename.endswith('_body'): continue sender, body = dataset.parse_msg_sender(filename) text, extracted_signature = extract(body, sender) extracted_signature = extracted_signature or '' with open(filename[:-len('body')] + 'signature', **kwargs) as ms: msg_signature = ms.read() eq_(msg_signature.strip(), extracted_signature.strip()) stripped_msg = body.strip()[:len(body.strip()) - len(msg_signature)] eq_(stripped_msg.strip(), text.strip())
def __init__(self, email_string): """ Takes a raw email string and processes it into something useful """ self.str = email_string self.raw = mime.from_string(self.str) to = self.raw.headers['To'] if to is None: self.recipients = [] else: to = to.lower() self.recipients = address.parse_list(to) if ',' in to else [address.parse(to)] # It's possible a recipient is None if it is something like # 'Undisclosed recipients:;' self.recipients = [r for r in self.recipients if r is not None] self.sender = address.parse(self.raw.headers['From'].lower()) self.subject = self.raw.subject self.id = self.raw.message_id self.date = parse(self.raw.headers['Date']) self.content_encoding = self.raw.content_encoding[0] # Extract plaintext body if self.raw.content_type.is_singlepart(): self.full_body = self.raw.body elif self.raw.content_type.is_multipart(): for p in self.raw.parts: if p.content_type == 'text/plain': self.full_body = p.body break # Try to get signature self.body, self.signature = extract_signature(self.full_body) # Try ML approach if necessary if self.signature is None: self.body, self.signature = signature.extract(self.full_body, sender=self.sender) # Get replies only, not the quotes self.body = quotations.extract_from(self.body, 'text/plain')
def test_capitalized(): msg_body = """Hi Mary, Do you still need a DJ for your wedding? I've included a video demo of one of our DJs available for your wedding date. DJ Doe http://example.com Password: SUPERPASSWORD Would you like to check out more? At your service, John Smith Doe Inc 555-531-7967""" sig = """John Smith Doe Inc 555-531-7967""" eq_(sig, extract(msg_body, 'Doe')[1])
def test_signature_extract_crash(has_signature): has_signature.side_effect = Exception("Bam!") msg_body = u"Blah\r\n--\r\n\r\nСергей" eq_((msg_body, None), extract(msg_body, "Сергей"))
def test_signature_extract_crash(has_signature): has_signature.side_effect = Exception('Bam!') msg_body = 'Blah\r\n--\r\n\r\nСергей' eq_((msg_body, None), signature.extract(msg_body, 'Сергей'))
import talon # don't forget to init the library first # it loads machine learning classifiers talon.init() from talon import signature message = """Annette Anderson, 11833 Spring Laurel DR Charlotte, NC 28215 704-724-4697 Client is a referral from our chiro- she is there now about to have her appointment. Please reach out in the next hour! Respectfully, Lacie N. Johnson Intake Paralegal Law Offices of Shane Smith, PC 263 Hwy 74 N Peachtree City, GA 30269 770.487.8999 ext. 42 770.631.7667 fax """ text, signature = signature.extract(message, sender='*****@*****.**') print text
def signature(self, sender, message): logger.info('message from %s', sender) text, sig = signature.extract(message, sender) logger.info('extracted %s', sig) return sig
def test_basic(): msg_body = 'Blah\r\n--\r\n\r\nSergey Obukhov' eq_(('Blah', '--\r\n\r\nSergey Obukhov'), signature.extract(msg_body, 'Sergey'))
def test_no_signature(): sender, body = "*****@*****.**", "Hello" eq_((body, None), extract(body, sender))
def receive_email(request): if request.method == 'POST': talon.init() from talon import signature sender = request.POST.get('sender') recipient = request.POST.get('recipient') subject = request.POST.get('subject', '') body_plain = request.POST.get('body-plain', '') text, signature = signature.extract(body_plain, sender=sender) body_without_quotes = request.POST.get('stripped-text', '') sender_name = get_object_or_404(User.objects.prefetch_related(), email=sender) raw_sender_name = sender_name.username synonyms = nltk_rel_words_email(subject + " " + text) print("Synonyms = ", synonyms) ## reply to post creates comment to that post if 'newpost' in recipient: print('found newhost') post_id = re.findall('([0-9]+)', recipient) print("Add comment to post ID ", post_id) print("Sender name ", sender_name.id) try: to_save_comment = Comment( case=post_id, comment=text, commented_by=sender_name, ) to_save_comment.save() print("Success!") return HttpResponse('OK') except Exception as e: print(e) else: if request.FILES: for key in request.FILES: file = request.FILES[key] # attachment_name = request.POST.get('attachment') attachment_name = 'attachment' to_save = Case(state=sender_name.state, county=sender_name.county, title=subject, issue_detail=text, created_by=str(raw_sender_name), issue_area_id=determine_area(recipient), related_document=file, related_document_name=attachment_name) else: to_save = Case( state=sender_name.state, county=sender_name.county, title=subject, issue_detail=text, created_by=str(raw_sender_name), issue_area_id=determine_area(recipient), ) to_save.save() print(to_save.id) return_email_info(sender, recipient, subject, to_save.id) find_rel_questions_email(synonyms, determine_area(recipient), sender_name.county, to_save.id) # attachments: # for key in request.FILES: # file = request.FILES[key] # do something with the file # # # Returned text is ignored but HTTP status code matters: # # Mailgun wants to see 2xx, otherwise it will make another attempt in 5 minutes return HttpResponse('OK')
def remove_signature(message, sender): text, _signature = signature.extract(message, sender) return text
def test_no_signature(): sender, body = "*****@*****.**", "Hello" eq_((body, None), signature.extract(body, sender))
def test_handles_unicode(): sender, body = dataset.parse_msg_sender(UNICODE_MSG) text, extracted_signature = extract(body, sender)
def test_basic(): raise SkipTest() msg_body = "Blah\r\n--\r\n\r\nSergey Obukhov" eq_(("Blah", "--\r\n\r\nSergey Obukhov"), extract(msg_body, "Sergey"))
def test_signature_extract_crash(has_signature): has_signature.side_effect = Exception('Bam!') msg_body = u'Blah\r\n--\r\n\r\nСергей' eq_((msg_body, None), extract(msg_body, 'Сергей'))
def test_basic(): msg_body = 'Blah\r\n--\r\n\r\nSergey Obukhov' eq_(('Blah', '--\r\n\r\nSergey Obukhov'), extract(msg_body, 'Sergey'))
def test_handles_unicode(): sender, body = dataset.parse_msg_sender(UNICODE_MSG) text, extracted_signature = signature.extract(body, sender)
def extractSignature_MachineLearning(message, sender): body, sig = signature.extract(message, sender=sender) return body, sig