def process_edit_read(self): """ Process when an edit box is read from the server """ root = self.parser.root result = {} for field, tag, pattern in [('To', 'textarea', 'tofield'), ('CC', 'textarea', 'ccfield'), ('Bcc', 'textarea', 'bccfield'), ('Subject', 'input', 'subjectfield')]: tmp = root.find(tag, {'id': pattern}) if tmp: try: result[field] = HTML.decode_entity(tmp.children[0]) except IndexError: pass ## Find the message: tmp = root.find('input', {'name': 'PlainMsg'}) if tmp: message = HTML.decode_entity(tmp['value']) if message: result['message'] = message if result: result['type'] = 'Edit Read' if self.username: result['From'] = self.username return self.insert_message(result, inode_template="y%s")
def process_edit_read(self): """ Process when an edit box is read from the server """ root = self.parser.root result = {} for field, tag, pattern in [('To','textarea','tofield'), ('CC','textarea','ccfield'), ('Bcc','textarea', 'bccfield'), ('Subject', 'input', 'subjectfield')]: tmp = root.find(tag, {'id': pattern}) if tmp: try: result[field] = HTML.decode_entity(tmp.children[0]) except IndexError: pass ## Find the message: tmp = root.find('input', {'name':'PlainMsg'}) if tmp: message = HTML.decode_entity(tmp['value']) if message: result['message'] = message if result: result['type']='Edit Read' if self.username: result['From'] = self.username return self.insert_message(result, inode_template="y%s")
def process_readmessage(self, fd, parser): result = {'type': 'Read', 'service': self.service} ## Find the subject sbj = parser.root.find('div', {'class': 'ReadMsgSubject'}) if sbj: result['subject'] = HTML.decode_entity(sbj.innerHTML()) context = None for td in parser.root.search('td'): data = td.innerHTML() if context: result[context] = HTML.decode_entity(data) context = None if data.lower().startswith('from:'): context = 'From' elif data.lower().startswith('to:'): context = 'To' elif data.lower().startswith('sent:'): context = 'Sent' msg = parser.root.find('div', {'class': 'ReadMsgContainer'}) ## Try to detect the message ID tag = parser.root.find('div', {'mid': '.'}) if tag: result['message_id'] = tag['mid'] else: result['message_id'] = fd.inode_id try: result['Sent'] = Time.parse(result['Sent']) except: pass if msg: message_urn = "/WebMail/%s/%s" % ( self.service, result['message_id'].replace("/", "_")) fsfd = FileSystem.DBFS(fd.case) try: if fsfd.lookup(path=message_urn): return except RuntimeError: pass pdb.set_trace() message_fd = CacheManager.AFF4_MANAGER.create_cache_data( fd.case, message_urn, inherited=fd.urn) message_fd.write(msg.innerHTML().encode("utf8")) message_fd.insert_to_table("webmail_messages", result) message_fd.close()
def process_readmessage(self, fd, parser): result = {'type': 'Read', 'service':self.service} ## Find the subject sbj = parser.root.find('div', {'class':'ReadMsgSubject'}) if sbj: result['subject'] = HTML.decode_entity(sbj.innerHTML()) context = None for td in parser.root.search('td'): data = td.innerHTML() if context: result[context] = HTML.decode_entity(data) context = None if data.lower().startswith('from:'): context = 'From' elif data.lower().startswith('to:'): context = 'To' elif data.lower().startswith('sent:'): context = 'Sent' msg = parser.root.find('div', {'class':'ReadMsgContainer'}) ## Try to detect the message ID tag = parser.root.find('div', {'mid':'.'}) if tag: result['message_id'] = tag['mid'] else: result['message_id'] = fd.inode_id try: result['Sent'] = Time.parse(result['Sent']) except: pass if msg: message_urn = "/WebMail/%s/%s" % (self.service, result['message_id'].replace("/","_")) fsfd = FileSystem.DBFS(fd.case) try: if fsfd.lookup(path = message_urn): return except RuntimeError: pass pdb.set_trace() message_fd = CacheManager.AFF4_MANAGER.create_cache_data( fd.case, message_urn, inherited = fd.urn) message_fd.write(msg.innerHTML().encode("utf8")) message_fd.insert_to_table("webmail_messages", result) message_fd.close()
def process_readmessage(self): result = {'type': 'Read', 'message':'' } ## We could get several messages in the same response: root = self.parser.root for message in root.search('message'): result['message_id'] = message.find("mid").innerHTML() try: result['sent'] = Time.parse(message.find("receiveddate").innerHTML()) except: pass result['subject'] = message.find("subject").innerHTML() for tag,field in [('from','From'), ('to','To')]: result[field] = self.parse_email_address(message, tag) ## now iterate over all the parts: for part in message.search("part"): ## Usually text/html are the main body try: if not result['message'] and part.attributes['type'] == 'text': text = part.find("text") result['message'] = HTML.unquote(HTML.decode_entity(text.innerHTML())) except KeyError: pass self.insert_message(result, "webmail")
def process_readmessage(self): result = {'type': 'Read', 'message': ''} ## We could get several messages in the same response: root = self.parser.root for message in root.search('message'): result['message_id'] = message.find("mid").innerHTML() try: result['sent'] = Time.parse( message.find("receiveddate").innerHTML()) except: pass result['subject'] = message.find("subject").innerHTML() for tag, field in [('from', 'From'), ('to', 'To')]: result[field] = self.parse_email_address(message, tag) ## now iterate over all the parts: for part in message.search("part"): ## Usually text/html are the main body try: if not result['message'] and part.attributes[ 'type'] == 'text': text = part.find("text") result['message'] = HTML.unquote( HTML.decode_entity(text.innerHTML())) except KeyError: pass self.insert_message(result, "webmail")
def parse_email_address(self, message, tag): from_tag = message.find(tag) if from_tag: try: name = from_tag.find("name").innerHTML() except: name = '' email = HTML.unquote(HTML.decode_entity(from_tag.find("email").innerHTML())) return "%s <%s>" % (name, email)
def process_readmessage(self, fd): result = {'type': 'Read', 'message': ''} root = self.parser.root tag = root.find('div', {'class': 'ReadMsgContainer'}) if not tag: return ## Find the subject: sbj = tag.find('td', {'class': 'ReadMsgSubject'}) if sbj: result['subject'] = HTML.decode_entity(sbj.innerHTML()) ## Fill in all the other fields: context = None for td in tag.search('td'): data = td.innerHTML() if context: result[context] = HTML.decode_entity(data) context = None if data.lower().startswith('from:'): context = 'From' elif data.lower().startswith('to:'): context = 'To' elif data.lower().startswith('sent:'): context = 'sent' ## Now the message: ## On newer sites its injected using script: for s in root.search('script'): m = re.match( "document\.getElementById\(\"MsgContainer\"\)\.innerHTML='([^']*)'", s.innerHTML()) if m: result['message'] += HTML.decode_unicode( m.group(1).decode("string_escape")) break try: result['sent'] = Time.parse(result['sent']) except: pass return self.insert_message(result)
def parse_email_address(self, message, tag): from_tag = message.find(tag) if from_tag: try: name = from_tag.find("name").innerHTML() except: name = '' email = HTML.unquote( HTML.decode_entity(from_tag.find("email").innerHTML())) return "%s <%s>" % (name, email)
def process_readmessage(self, message): parser = HTML.HTMLParser(verbose=0) parser.feed(message) parser.close() result = {'type': 'Read', 'Message':''} ## Find the subject sbj = parser.root.find('td', {'class':'ReadMsgSubject'}) if sbj: result['Subject'] = HTML.decode_entity(sbj.innerHTML()) context = None for td in parser.root.search('td'): data = td.innerHTML() if context: result[context] = HTML.decode_entity(data) context = None if data.lower().startswith('from:'): context = 'From' elif data.lower().startswith('to:'): context = 'To' elif data.lower().startswith('sent:'): context = 'Sent' msg = parser.root.find('div', {'class':'ReadMsgContainer'}) if msg: result['Message'] = msg.innerHTML() ## Try to detect the message ID tag = parser.root.find('div', {'mid':'.'}) if tag: result['message_id'] = tag['mid'] try: result[context] = Time.parse(result[context]) except: pass return self.insert_message(result, inode_template = 'l%s')
def process_readmessage(self, message): parser = HTML.HTMLParser(verbose=0) parser.feed(message) parser.close() result = {'type': 'Read', 'Message': ''} ## Find the subject sbj = parser.root.find('td', {'class': 'ReadMsgSubject'}) if sbj: result['Subject'] = HTML.decode_entity(sbj.innerHTML()) context = None for td in parser.root.search('td'): data = td.innerHTML() if context: result[context] = HTML.decode_entity(data) context = None if data.lower().startswith('from:'): context = 'From' elif data.lower().startswith('to:'): context = 'To' elif data.lower().startswith('sent:'): context = 'Sent' msg = parser.root.find('div', {'class': 'ReadMsgContainer'}) if msg: result['Message'] = msg.innerHTML() ## Try to detect the message ID tag = parser.root.find('div', {'mid': '.'}) if tag: result['message_id'] = tag['mid'] try: result[context] = Time.parse(result[context]) except: pass return self.insert_message(result, inode_template='l%s')
def process_editread(self, fd): ## Find the ComposeHeader table: result = {'type':'Edit Read'} root = self.parser.root tag = root.find('table', {"class":'ComposeHeader'}) if not tag: return ## Find the From: row = tag.find( 'select', dict(name = 'ffrom')) if row: option = row.find('option', dict(selected='.*')) result['From'] = HTML.decode_entity(option['value']) for field, pattern in [('To','fto'), ('CC','fcc'), ('BCC', 'fbcc'), ('subject', 'fsubject')]: tmp = tag.find('input', dict(name = pattern)) if tmp: result[field] = HTML.decode_entity(tmp['value']) ## Now extract the content of the email: result['message'] = '' ## Sometimes the message is found in the EditArea div: div = root.find('div', dict(id='EditArea')) if div: result['message'] += div.innerHTML() ## On newer sites its injected using script: for s in root.search('script'): m=re.match("document\.getElementById\(\"fEditArea\"\)\.innerHTML='([^']*)'", s.innerHTML()) if m: result['message'] += m.group(1).decode("string_escape") break return self.insert_message(fd, result)
def process_readmessage(self,fd): result = {'type': 'Read', 'message':''} root = self.parser.root tag = root.find('div', {'class':'ReadMsgContainer'}) if not tag: return ## Find the subject: sbj = tag.find('td', {'class':'ReadMsgSubject'}) if sbj: result['subject'] = HTML.decode_entity(sbj.innerHTML()) ## Fill in all the other fields: context = None for td in tag.search('td'): data = td.innerHTML() if context: result[context] = HTML.decode_entity(data) context = None if data.lower().startswith('from:'): context = 'From' elif data.lower().startswith('to:'): context = 'To' elif data.lower().startswith('sent:'): context = 'sent' ## Now the message: ## On newer sites its injected using script: for s in root.search('script'): m=re.match("document\.getElementById\(\"MsgContainer\"\)\.innerHTML='([^']*)'", s.innerHTML()) if m: result['message'] += HTML.decode_unicode(m.group(1).decode("string_escape")) break try: result['sent'] = Time.parse(result['sent']) except: pass return self.insert_message(fd, result)
def process_editread(self, fd): ## Find the ComposeHeader table: result = {'type': 'Edit Read'} root = self.parser.root tag = root.find('table', {"class": 'ComposeHeader'}) if not tag: return ## Find the From: row = tag.find('select', dict(name='ffrom')) if row: option = row.find('option', dict(selected='.*')) result['From'] = HTML.decode_entity(option['value']) for field, pattern in [('To', 'fto'), ('CC', 'fcc'), ('BCC', 'fbcc'), ('subject', 'fsubject')]: tmp = tag.find('input', dict(name=pattern)) if tmp: result[field] = HTML.decode_entity(tmp['value']) ## Now extract the content of the email: result['message'] = '' ## Sometimes the message is found in the EditArea div: div = root.find('div', dict(id='EditArea')) if div: result['message'] += div.innerHTML() ## On newer sites its injected using script: for s in root.search('script'): m = re.match( "document\.getElementById\(\"fEditArea\"\)\.innerHTML='([^']*)'", s.innerHTML()) if m: result['message'] += m.group(1).decode("string_escape") break return self.insert_message(result)
def process_message_yahoo1(self, result, header): """ Handle Yahoo mail from old version (prior to 20080224) """ ## Look through all its rows: context = None for td in header.search("td"): if context: for i in td: if type(i) == str: result[context] = HTML.unquote( HTML.decode_entity(i)) break context = None data = td.innerHTML() if data.lower().strip().startswith('from:'): context = 'From' elif data.lower().strip().startswith('to:'): context = 'To' elif data.lower().strip().startswith('date:'): context = 'Sent' elif data.lower().strip().startswith('subject:'): context = 'Subject' ## Now the message: msgbody = self.parser.root.find('div', {"class": "msgbody"}) if msgbody: result['message'] = msgbody.innerHTML() if 'Sent' in result: #result['Sent'] = ColumnTypes.guess_date(result['Sent']) result['sent'] = Time.parse(result['sent'], case=self.case, evidence_tz=None) ## Find the message id: tag = header.find('input', dict(name='MsgId')) if tag: result['message_id'] = tag['value'] if len(result.keys()) > 3: return self.insert_message(result, inode_template="y%s")
def process_message_yahoo1(self, result, header): """ Handle Yahoo mail from old version (prior to 20080224) """ ## Look through all its rows: context = None for td in header.search("td"): if context: for i in td: if type(i)==str: result[context] = HTML.unquote(HTML.decode_entity(i)) break context = None data = td.innerHTML() if data.lower().strip().startswith('from:'): context = 'From' elif data.lower().strip().startswith('to:'): context = 'To' elif data.lower().strip().startswith('date:'): context = 'Sent' elif data.lower().strip().startswith('subject:'): context = 'Subject' ## Now the message: msgbody = self.parser.root.find('div', {"class":"msgbody"}) if msgbody: result['message'] = msgbody.innerHTML() if 'Sent' in result: #result['Sent'] = ColumnTypes.guess_date(result['Sent']) result['sent'] = Time.parse(result['sent'], case=self.case, evidence_tz=None) ## Find the message id: tag = header.find('input', dict(name='MsgId')) if tag: result['message_id'] = tag['value'] if len(result.keys())>3: return self.insert_message(result, inode_template = "y%s")
def process_readmessage(self,fd): ## This is what the message tree looks like (XML): ## <GetDisplayMessageResponse> ## <message> ## <header> ## <part> ## <part> ## <message> ## <message> ## Each message is a seperate message - therefore the same ## HTTP object might relay several messages. root = self.parser.root for message in root.search('message'): result = {'type': 'Read', 'service':self.service } result['message_id'] = message.find("mid").innerHTML() ## Messages are made unique using the message_id. This ## ensures that even if the same message was seen multiple ## times in the traffic, we only retain one copy of it. message_urn = "/Webmail/%s/%s" % (self.service, result['message_id'].replace("/","_")) ## Make sure we dont have duplicates of the same message - ## duplicates may occur in other connections, so we check ## the webmail table for the same yahoo message id fsfd = FileSystem.DBFS(fd.case) try: if fsfd.lookup(path = message_urn): continue except RuntimeError: pass try: result['sent'] = Time.parse(message.find("receiveddate").innerHTML()) except: pass result['subject'] = HTML.unquote(HTML.decode_entity( message.find("subject").innerHTML())) for tag,field in [('from','From'), ('to','To')]: result[field] = self.parse_email_address(message, tag) message_fd = CacheManager.AFF4_MANAGER.create_cache_data( fd.case, message_urn, inherited = fd.urn) message_fd.insert_to_table("webmail_messages", result) ## now iterate over all the parts: for part in message.search("part"): ## Parts are basically message attachments. ct = part.attributes['type'] part_number = part.attributes['partid'] part_urn = "/".join((message_urn, part_number)) ## Usually text/html are the main body data = None if "text" in ct: text = part.find("text") message_fd.write(HTML.unquote(HTML.decode_entity(text.innerHTML()))) elif "image" in ct: message_fd.write(DB.expand("<b>%s</b><br><img src='%s'/>",( self.make_link(part.attributes.get('filename','')), self.make_link(part.attributes['thumbnailurl'])))) message_fd.close()
def process_readmessage(self, fd): ## This is what the message tree looks like (XML): ## <GetDisplayMessageResponse> ## <message> ## <header> ## <part> ## <part> ## <message> ## <message> ## Each message is a seperate message - therefore the same ## HTTP object might relay several messages. root = self.parser.root for message in root.search('message'): result = {'type': 'Read', 'service': self.service} result['message_id'] = message.find("mid").innerHTML() ## Messages are made unique using the message_id. This ## ensures that even if the same message was seen multiple ## times in the traffic, we only retain one copy of it. message_urn = "/Webmail/%s/%s" % ( self.service, result['message_id'].replace("/", "_")) ## Make sure we dont have duplicates of the same message - ## duplicates may occur in other connections, so we check ## the webmail table for the same yahoo message id fsfd = FileSystem.DBFS(fd.case) try: if fsfd.lookup(path=message_urn): continue except RuntimeError: pass try: result['sent'] = Time.parse( message.find("receiveddate").innerHTML()) except: pass result['subject'] = HTML.unquote( HTML.decode_entity(message.find("subject").innerHTML())) for tag, field in [('from', 'From'), ('to', 'To')]: result[field] = self.parse_email_address(message, tag) message_fd = CacheManager.AFF4_MANAGER.create_cache_data( fd.case, message_urn, inherited=fd.urn) message_fd.insert_to_table("webmail_messages", result) ## now iterate over all the parts: for part in message.search("part"): ## Parts are basically message attachments. ct = part.attributes['type'] part_number = part.attributes['partid'] part_urn = "/".join((message_urn, part_number)) ## Usually text/html are the main body data = None if "text" in ct: text = part.find("text") message_fd.write( HTML.unquote(HTML.decode_entity(text.innerHTML()))) elif "image" in ct: message_fd.write( DB.expand( "<b>%s</b><br><img src='%s'/>", (self.make_link(part.attributes.get( 'filename', '')), self.make_link(part.attributes['thumbnailurl'])))) message_fd.close()