def test_extract_from_respects_content_type(extract_from_plain, extract_from_html): msg_body = "Hi there" quotations.extract_from(msg_body, "text/plain") extract_from_plain.assert_called_with(msg_body) quotations.extract_from(msg_body, "text/html") extract_from_html.assert_called_with(msg_body) eq_(msg_body, quotations.extract_from(msg_body, "text/blah"))
def test_extract_from_respects_content_type(extract_from_plain, extract_from_html): msg_body = "Hi there" quotations.extract_from(msg_body, 'text/plain') extract_from_plain.assert_called_with(msg_body) quotations.extract_from(msg_body, 'text/html') extract_from_html.assert_called_with(msg_body) eq_(msg_body, quotations.extract_from(msg_body, 'text/blah'))
def process(msg, filename, sender): if not filename.endswith('.'): return None content = email.message_from_string(msg) body = [] if content.is_multipart(): for payload in content.get_payload(): body.append(payload.get_payload()) else: body.append(content.get_payload()) if body is None: # discard mail without body print filename + ': body is None!' reply = quotations.extract_from(body[0], 'text/plain') #print filename + ":\n" + reply if re.search('\d+\.$', filename) is not None: reply_filename = re.sub('\d+\.$', change_name_body, filename) f = open(reply_filename, 'w') f.write(reply.strip()) f.close() sender_filename = re.sub('\d+\.$', change_name_sender, filename) f = open(sender_filename, 'w') f.write(sender.strip()) f.close() print filename return reply
def post(self): json_response = {} input_data = self.request.body try: input_data = json.loads(input_data) data_type = input_data["type"] content = input_data["content"] reply = quotations.extract_from(content, data_type) json_response["Success"] = True json_response["Content"] = reply #json_response["position"]=len(reply)-1 self.write(json_response) except Exception, e: print e json_response["Success"] = False self.write(json_response)
def get_messages(**options): for message_data in fetch_messages(**options): channel = options['channel'] email_message = email.message_from_bytes(message_data) subject = decoded_header(email_message.get('Subject')) from_header = email_message.get('From') to_header = email_message.get('To') from_name = email.utils.getaddresses([from_header])[0][0] msgid = email_message.get('Message-ID').strip() should_notify = True try: if not options.get('text', ''): should_notify = not MailMessage.objects.get(pk=msgid) except MailMessage.DoesNotExist: pass if should_notify: # Create a nicely-formatted version of the message body, mimetype = get_body(email_message) reply = quotations.extract_from(body, mimetype) text, sig = signature.extract(reply, sender=from_header) if mimetype == "text/html": text = HTMLSlacker(text).get_output() msg = "_{}_ to _{}_\n*{}*\n\n{}".format(from_name, to_header, subject, text) msg = truncatewords_html(msg, 400) opts = {'channel': channel, 'text': msg} # Attempt to thread any email conversation as a Slack thread references = email_message.get('References', '').split() thread = MailMessage.objects.filter(msgid__in=references) sc = SlackClient(os.environ['SLACK_TOKEN']) # Send the message response = sc.api_call("chat.postMessage", **opts) if thread: # Also add it as a thread opts['thread_ts'] = thread.first().slackthread_ts response = sc.api_call("chat.postMessage", **opts) if response['ok']: ts = response['ts'] msg, created = MailMessage.objects.get_or_create( subject=subject, msgid=msgid) msg.slackthread_ts = ts msg.save()
def __init__(self, email_string): """ Takes a raw email string and processes it into something useful """ self.str = email_string self.raw = mime.from_string(self.str) to = self.raw.headers['To'] if to is None: self.recipients = [] else: to = to.lower() self.recipients = address.parse_list(to) if ',' in to else [address.parse(to)] # It's possible a recipient is None if it is something like # 'Undisclosed recipients:;' self.recipients = [r for r in self.recipients if r is not None] self.sender = address.parse(self.raw.headers['From'].lower()) self.subject = self.raw.subject self.id = self.raw.message_id self.date = parse(self.raw.headers['Date']) self.content_encoding = self.raw.content_encoding[0] # Extract plaintext body if self.raw.content_type.is_singlepart(): self.full_body = self.raw.body elif self.raw.content_type.is_multipart(): for p in self.raw.parts: if p.content_type == 'text/plain': self.full_body = p.body break # Try to get signature self.body, self.signature = extract_signature(self.full_body) # Try ML approach if necessary if self.signature is None: self.body, self.signature = signature.extract(self.full_body, sender=self.sender) # Get replies only, not the quotes self.body = quotations.extract_from(self.body, 'text/plain')
def process(msg,filename,sender): content = email.message_from_string(msg) body = [] if content.is_multipart(): for payload in content.get_payload(): body.append(payload.get_payload()) else: body.append(content.get_payload()) if body is None: # discard mail without body print filename + ': body is None!' reply = quotations.extract_from(body[0], 'text/plain') #print filename + ":\n" + reply if re.search('\d+\.$',filename) is not None: reply_filename = re.sub('\d+\.$', change_name_reply, filename) f = open(reply_filename, 'w') f.write(reply.strip()) f.close() sender_filename = re.sub('\d+\.$', change_name_sender, filename) f = open(sender_filename, 'w') f.write(sender.strip()) f.close() print filename return reply
def test_crash_inside_extract_from(): msg_body = "Hi there" eq_(msg_body, quotations.extract_from(msg_body, 'text/plain'))
def _encode(obj, namespace_public_id=None, expand=False, legacy_nsid=False): """ Returns a dictionary representation of an Inbox model object obj, or None if there is no such representation defined. If the optional namespace_public_id parameter is passed, it will used instead of fetching the namespace public id for each object. This improves performance when serializing large numbers of objects, but also means that you must take care to ONLY serialize objects that belong to the given namespace! Parameters ---------- namespace_public_id: string, optional public id of the namespace to which the object to serialize belongs. Returns ------- dictionary or None """ def _get_namespace_public_id(obj): return namespace_public_id or obj.namespace.public_id def _format_participant_data(participant): """Event.participants is a JSON blob which may contain internal data. This function returns a dict with only the data we want to make public.""" dct = {} for attribute in ['name', 'status', 'email', 'comment']: dct[attribute] = participant.get(attribute) return dct def _get_lowercase_class_name(obj): return type(obj).__name__.lower() if legacy_nsid: public_id_key_name = 'namespace_id' else: public_id_key_name = 'account_id' # Flask's jsonify() doesn't handle datetimes or json arrays as primary # objects. if isinstance(obj, datetime.datetime): return calendar.timegm(obj.utctimetuple()) if isinstance(obj, datetime.date): return obj.isoformat() if isinstance(obj, arrow.arrow.Arrow): return encode(obj.datetime, legacy_nsid=legacy_nsid) # TODO deprecate this and remove -- legacy_nsid elif isinstance(obj, Namespace) and legacy_nsid: return { 'id': obj.public_id, 'object': 'namespace', 'namespace_id': obj.public_id, # Account specific 'account_id': obj.account.public_id, 'email_address': obj.account.email_address, 'name': obj.account.name, 'provider': obj.account.provider, 'organization_unit': obj.account.category_type } elif isinstance(obj, Namespace): # these are now "Account" objects return { 'id': obj.public_id, 'object': 'account', 'account_id': obj.public_id, 'email_address': obj.account.email_address, 'name': obj.account.name, 'provider': obj.account.provider, 'organization_unit': obj.account.category_type } elif isinstance(obj, Account) and not legacy_nsid: raise Exception("Should never be serializing accounts (legacy_nsid)") elif isinstance(obj, Account): return { 'account_id': obj.namespace.public_id, # ugh 'id': obj.namespace.public_id, # ugh 'object': 'account', 'email_address': obj.email_address, 'name': obj.name, 'organization_unit': obj.category_type, 'provider': obj.provider, # TODO add capabilities/scope (i.e. mail, contacts, cal, etc.) # 'status': 'syncing', # TODO what are values here # 'last_sync': 1398790077, # tuesday 4/29 } elif isinstance(obj, Message): resp = { 'id': obj.public_id, 'object': 'message', public_id_key_name: _get_namespace_public_id(obj), 'subject': obj.subject, 'from': format_address_list(obj.from_addr), 'reply_to': format_address_list(obj.reply_to), 'to': format_address_list(obj.to_addr), 'cc': format_address_list(obj.cc_addr), 'bcc': format_address_list(obj.bcc_addr), 'date': obj.received_date, 'thread_id': obj.thread.public_id, 'snippet': obj.snippet, 'body': obj.body, 'text': quotations.extract_from_html(quotations.extract_from(obj.body, 'text/html')), 'unread': not obj.is_read, 'starred': obj.is_starred, 'files': obj.api_attachment_metadata, 'events': [encode(e, legacy_nsid=legacy_nsid) for e in obj.events] } categories = format_categories(obj.categories) if obj.namespace.account.category_type == 'folder': resp['folder'] = categories[0] if categories else None else: resp['labels'] = categories # If the message is a draft (Inbox-created or otherwise): if obj.is_draft: resp['object'] = 'draft' resp['version'] = obj.version if obj.reply_to_message is not None: resp['reply_to_message_id'] = obj.reply_to_message.public_id else: resp['reply_to_message_id'] = None if expand: resp['headers'] = { 'Message-Id': obj.message_id_header, 'In-Reply-To': obj.in_reply_to, 'References': obj.references } return resp elif isinstance(obj, Thread): base = { 'id': obj.public_id, 'object': 'thread', public_id_key_name: _get_namespace_public_id(obj), 'subject': obj.subject, 'participants': format_address_list(obj.participants), 'last_message_timestamp': obj.recentdate, 'last_message_received_timestamp': obj.receivedrecentdate, 'first_message_timestamp': obj.subjectdate, 'snippet': obj.snippet, 'unread': obj.unread, 'starred': obj.starred, 'has_attachments': obj.has_attachments, 'version': obj.version, # For backwards-compatibility -- remove after deprecating tags API 'tags': obj.tags } categories = format_categories(obj.categories) if obj.namespace.account.category_type == 'folder': base['folders'] = categories else: base['labels'] = categories if not expand: base['message_ids'] = \ [m.public_id for m in obj.messages if not m.is_draft] base['draft_ids'] = [m.public_id for m in obj.drafts] return base # Expand messages within threads all_expanded_messages = [] all_expanded_drafts = [] for msg in obj.messages: resp = { 'id': msg.public_id, 'object': 'message', public_id_key_name: _get_namespace_public_id(msg), 'subject': msg.subject, 'from': format_address_list(msg.from_addr), 'reply_to': format_address_list(msg.reply_to), 'to': format_address_list(msg.to_addr), 'cc': format_address_list(msg.cc_addr), 'bcc': format_address_list(msg.bcc_addr), 'date': msg.received_date, 'thread_id': obj.public_id, 'snippet': msg.snippet, 'unread': not msg.is_read, 'starred': msg.is_starred, 'files': msg.api_attachment_metadata } categories = format_categories(msg.categories) if obj.namespace.account.category_type == 'folder': resp['folder'] = categories[0] if categories else None else: resp['labels'] = categories if msg.is_draft: resp['object'] = 'draft' resp['version'] = msg.version if msg.reply_to_message is not None: resp['reply_to_message_id'] = \ msg.reply_to_message.public_id else: resp['reply_to_message_id'] = None all_expanded_drafts.append(resp) else: all_expanded_messages.append(resp) base['messages'] = all_expanded_messages base['drafts'] = all_expanded_drafts return base elif isinstance(obj, Contact): return { 'id': obj.public_id, 'object': 'contact', public_id_key_name: _get_namespace_public_id(obj), 'name': obj.name, 'email': obj.email_address } elif isinstance(obj, Event): resp = { 'id': obj.public_id, 'object': 'event', public_id_key_name: _get_namespace_public_id(obj), 'calendar_id': obj.calendar.public_id if obj.calendar else None, 'message_id': obj.message.public_id if obj.message else None, 'title': obj.title, 'description': obj.description, 'owner': obj.owner, 'participants': [_format_participant_data(participant) for participant in obj.participants], 'read_only': obj.read_only, 'location': obj.location, 'when': encode(obj.when, legacy_nsid=legacy_nsid), 'busy': obj.busy, 'status': obj.status, } if isinstance(obj, RecurringEvent): resp['recurrence'] = { 'rrule': obj.recurring, 'timezone': obj.start_timezone } if isinstance(obj, RecurringEventOverride): resp['original_start_time'] = encode(obj.original_start_time, legacy_nsid=legacy_nsid) if obj.master: resp['master_event_id'] = obj.master.public_id return resp elif isinstance(obj, Calendar): return { 'id': obj.public_id, 'object': 'calendar', public_id_key_name: _get_namespace_public_id(obj), 'name': obj.name, 'description': obj.description, 'read_only': obj.read_only, } elif isinstance(obj, When): # Get time dictionary e.g. 'start_time': x, 'end_time': y or 'date': z times = obj.get_time_dict() resp = {k: encode(v, legacy_nsid=legacy_nsid) for k, v in times.iteritems()} resp['object'] = _get_lowercase_class_name(obj) return resp elif isinstance(obj, Block): # ie: Attachments/Files resp = { 'id': obj.public_id, 'object': 'file', public_id_key_name: _get_namespace_public_id(obj), 'content_type': obj.content_type, 'size': obj.size, 'filename': obj.filename, } if len(obj.parts): # if obj is actually a message attachment (and not merely an # uploaded file), set additional properties resp.update({ 'message_ids': [p.message.public_id for p in obj.parts] }) return resp elif isinstance(obj, Category): # 'object' is set to 'folder' or 'label' resp = { 'id': obj.public_id, 'object': obj.type, public_id_key_name: _get_namespace_public_id(obj), 'name': obj.name, 'display_name': obj.api_display_name } return resp
def reply(menssagem): reply = quotations.extract_from(menssagem, 'text/html') return reply
def _encode(obj, namespace_public_id=None, expand=False, legacy_nsid=False): """ Returns a dictionary representation of an Inbox model object obj, or None if there is no such representation defined. If the optional namespace_public_id parameter is passed, it will used instead of fetching the namespace public id for each object. This improves performance when serializing large numbers of objects, but also means that you must take care to ONLY serialize objects that belong to the given namespace! Parameters ---------- namespace_public_id: string, optional public id of the namespace to which the object to serialize belongs. Returns ------- dictionary or None """ def _get_namespace_public_id(obj): return namespace_public_id or obj.namespace.public_id def _format_participant_data(participant): """Event.participants is a JSON blob which may contain internal data. This function returns a dict with only the data we want to make public.""" dct = {} for attribute in ["name", "status", "email", "comment"]: dct[attribute] = participant.get(attribute) return dct def _get_lowercase_class_name(obj): return type(obj).__name__.lower() if legacy_nsid: public_id_key_name = "namespace_id" else: public_id_key_name = "account_id" # Flask's jsonify() doesn't handle datetimes or json arrays as primary # objects. if isinstance(obj, datetime.datetime): return calendar.timegm(obj.utctimetuple()) if isinstance(obj, datetime.date): return obj.isoformat() if isinstance(obj, arrow.arrow.Arrow): return encode(obj.datetime, legacy_nsid=legacy_nsid) # TODO deprecate this and remove -- legacy_nsid elif isinstance(obj, Namespace) and legacy_nsid: return { "id": obj.public_id, "object": "namespace", "namespace_id": obj.public_id, # Account specific "account_id": obj.account.public_id, "email_address": obj.account.email_address, "name": obj.account.name, "provider": obj.account.provider, "organization_unit": obj.account.category_type, } elif isinstance(obj, Namespace): # these are now "Account" objects return { "id": obj.public_id, "object": "account", "account_id": obj.public_id, "email_address": obj.account.email_address, "name": obj.account.name, "provider": obj.account.provider, "organization_unit": obj.account.category_type, } elif isinstance(obj, Account) and not legacy_nsid: raise Exception("Should never be serializing accounts (legacy_nsid)") elif isinstance(obj, Account): return { "account_id": obj.namespace.public_id, # ugh "id": obj.namespace.public_id, # ugh "object": "account", "email_address": obj.email_address, "name": obj.name, "organization_unit": obj.category_type, "provider": obj.provider, # TODO add capabilities/scope (i.e. mail, contacts, cal, etc.) # 'status': 'syncing', # TODO what are values here # 'last_sync': 1398790077, # tuesday 4/29 } elif isinstance(obj, Message): resp = { "id": obj.public_id, "object": "message", public_id_key_name: _get_namespace_public_id(obj), "subject": obj.subject, "from": format_address_list(obj.from_addr), "reply_to": format_address_list(obj.reply_to), "to": format_address_list(obj.to_addr), "cc": format_address_list(obj.cc_addr), "bcc": format_address_list(obj.bcc_addr), "date": obj.received_date, "thread_id": obj.thread.public_id, "snippet": obj.snippet, "body": obj.body, "text": quotations.extract_from_html(quotations.extract_from(obj.body, "text/html")), "unread": not obj.is_read, "starred": obj.is_starred, "files": obj.api_attachment_metadata, "events": [encode(e, legacy_nsid=legacy_nsid) for e in obj.events], } categories = format_categories(obj.categories) if obj.namespace.account.category_type == "folder": resp["folder"] = categories[0] if categories else None else: resp["labels"] = categories # If the message is a draft (Inbox-created or otherwise): if obj.is_draft: resp["object"] = "draft" resp["version"] = obj.version if obj.reply_to_message is not None: resp["reply_to_message_id"] = obj.reply_to_message.public_id else: resp["reply_to_message_id"] = None if expand: resp["headers"] = { "Message-Id": obj.message_id_header, "In-Reply-To": obj.in_reply_to, "References": obj.references, } return resp elif isinstance(obj, Thread): base = { "id": obj.public_id, "object": "thread", public_id_key_name: _get_namespace_public_id(obj), "subject": obj.subject, "participants": format_address_list(obj.participants), "last_message_timestamp": obj.recentdate, "last_message_received_timestamp": obj.receivedrecentdate, "first_message_timestamp": obj.subjectdate, "snippet": obj.snippet, "unread": obj.unread, "starred": obj.starred, "has_attachments": obj.has_attachments, "version": obj.version, # For backwards-compatibility -- remove after deprecating tags API "tags": obj.tags, } categories = format_categories(obj.categories) if obj.namespace.account.category_type == "folder": base["folders"] = categories else: base["labels"] = categories if not expand: base["message_ids"] = [m.public_id for m in obj.messages if not m.is_draft] base["draft_ids"] = [m.public_id for m in obj.drafts] return base # Expand messages within threads all_expanded_messages = [] all_expanded_drafts = [] for msg in obj.messages: resp = { "id": msg.public_id, "object": "message", public_id_key_name: _get_namespace_public_id(msg), "subject": msg.subject, "from": format_address_list(msg.from_addr), "reply_to": format_address_list(msg.reply_to), "to": format_address_list(msg.to_addr), "cc": format_address_list(msg.cc_addr), "bcc": format_address_list(msg.bcc_addr), "date": msg.received_date, "thread_id": obj.public_id, "snippet": msg.snippet, "unread": not msg.is_read, "starred": msg.is_starred, "files": msg.api_attachment_metadata, } categories = format_categories(msg.categories) if obj.namespace.account.category_type == "folder": resp["folder"] = categories[0] if categories else None else: resp["labels"] = categories if msg.is_draft: resp["object"] = "draft" resp["version"] = msg.version if msg.reply_to_message is not None: resp["reply_to_message_id"] = msg.reply_to_message.public_id else: resp["reply_to_message_id"] = None all_expanded_drafts.append(resp) else: all_expanded_messages.append(resp) base["messages"] = all_expanded_messages base["drafts"] = all_expanded_drafts return base elif isinstance(obj, Contact): return { "id": obj.public_id, "object": "contact", public_id_key_name: _get_namespace_public_id(obj), "name": obj.name, "email": obj.email_address, } elif isinstance(obj, Event): resp = { "id": obj.public_id, "object": "event", public_id_key_name: _get_namespace_public_id(obj), "calendar_id": obj.calendar.public_id if obj.calendar else None, "message_id": obj.message.public_id if obj.message else None, "title": obj.title, "description": obj.description, "owner": obj.owner, "participants": [_format_participant_data(participant) for participant in obj.participants], "read_only": obj.read_only, "location": obj.location, "when": encode(obj.when, legacy_nsid=legacy_nsid), "busy": obj.busy, "status": obj.status, } if isinstance(obj, RecurringEvent): resp["recurrence"] = {"rrule": obj.recurring, "timezone": obj.start_timezone} if isinstance(obj, RecurringEventOverride): resp["original_start_time"] = encode(obj.original_start_time, legacy_nsid=legacy_nsid) if obj.master: resp["master_event_id"] = obj.master.public_id return resp elif isinstance(obj, Calendar): return { "id": obj.public_id, "object": "calendar", public_id_key_name: _get_namespace_public_id(obj), "name": obj.name, "description": obj.description, "read_only": obj.read_only, } elif isinstance(obj, When): # Get time dictionary e.g. 'start_time': x, 'end_time': y or 'date': z times = obj.get_time_dict() resp = {k: encode(v, legacy_nsid=legacy_nsid) for k, v in times.iteritems()} resp["object"] = _get_lowercase_class_name(obj) return resp elif isinstance(obj, Block): # ie: Attachments/Files resp = { "id": obj.public_id, "object": "file", public_id_key_name: _get_namespace_public_id(obj), "content_type": obj.content_type, "size": obj.size, "filename": obj.filename, } if len(obj.parts): # if obj is actually a message attachment (and not merely an # uploaded file), set additional properties resp.update({"message_ids": [p.message.public_id for p in obj.parts]}) return resp elif isinstance(obj, Category): # 'object' is set to 'folder' or 'label' resp = { "id": obj.public_id, "object": obj.type, public_id_key_name: _get_namespace_public_id(obj), "name": obj.name, "display_name": obj.api_display_name, } return resp
import sys import talon import base64 from talon import quotations talon.init() type = sys.argv[1] #html = base64.b64decode(sys.argv[2]) #html = sys.stdin.readline().rstrip() html = "" for line in sys.stdin: html += line.rstrip() html = base64.b64decode(html) if type.lower() == 'html': reply = quotations.extract_from_html(html) else: reply = quotations.extract_from(html, 'type/plain') # reply == "<html><body><p>Reply</p></body></html>" print ("%s" % reply)
import talon from talon import quotations talon.init() text = """Reply -----Original Message----- Quote""" reply = quotations.extract_from(text, 'text/plain') # reply = quotations.extract_from_plain(text) print(reply)