Exemplo n.º 1
0
    def stats(self, query, result):
        result.start_table(**{'class': 'GeneralTable'})
        dbh = DB.DBO(self.case)
        columns = [
            "service", "type", "From", "To", "CC", "BCC", "sent", "subject",
            "message"
        ]
        dbh.execute("select * from webmail_messages where `inode_id`=%r",
                    self.lookup_id())
        row = dbh.fetch()

        dbh2 = DB.DBO(self.case)
        dbh2.execute("select * from inode where inode_id = %r",
                     row['inode_id'])
        row2 = dbh2.fetch()
        result.row("Timestamp", row2['mtime'])

        for c in columns:
            if c == 'message':
                ## Filter the message out here:
                parser = HTML.HTMLParser(tag_class = \
                                         FlagFramework.Curry(HTML.ResolvingHTMLTag,
                                                             case = self.case,
                                                             inode_id = row['parent_inode_id']))
                #parser = HTML.HTMLParser(tag_class = HTML.TextTag)
                parser.feed(HTML.decode(row[c] or ""))
                parser.close()
                #tmp = result.__class__(result)
                #tmp.text(parser.root.innerHTML(), font='typewriter', wrap='full')
                #row[c] = tmp
                r = parser.root.__str__()
                r = textwrap.fill(r)
                row[c] = r
Exemplo n.º 2
0
    def sanitize_page(self, tag_class):
        """ This produces a rendered version of the underlying page """
        ## Get the original HTML File:
        fsfd = FileSystem.DBFS(self.case)
        fd = fsfd.open(inode_id=self.parent_inode_id)
        #data = HTML.decode(fd.read())
        data = fd.read()
        ## FIXME - This is a hack which works because we always send a
        ## curried class down:
        try:
            tag_class.kwargs['inode_id'] = self.parent_inode_id
        except AttributeError:
            pass

        ## Make a parser:
        p = HTML.HTMLParser(tag_class=tag_class)
        p.feed(data)
        p.close()

        ## Allow us to fix the html page
        root = p.root
        self.fixup_page(root, tag_class)

        ## Add the timestamp to the title of the page - so when you
        ## print it out we can identify it:
        s = fsfd.istat(inode_id=self.parent_inode_id)
        title_tag = root.find("title")
        if title_tag:
            title_tag.children = [
                "%s %s %s" % (title_tag.innerHTML(), s['mtime'], s['inode']),
            ]

        return root.innerHTML()
Exemplo n.º 3
0
    def scan(self, fd, scanners, type, mime, cookie, scores=None, **args):
        if scores.get('GmailStreamMagic', 0) == 0:
            return

        pyflaglog.log(pyflaglog.DEBUG,
                      "Opening %s for Gmail processing" % fd.inode_id)
        self.current_time = None
        self.current_box = 'Unknown'

        if "html" in mime:
            html_parser = HTML.HTMLParser()
            html_parser.parse_fd(fd)
            html_parser.close()

            ## Process all script segments
            for script_tag in html_parser.root.search("script"):
                script = script_tag.innerHTML()
                try:
                    j = Javascript.JSParser()
                    j.feed(script)
                    j.close()
                except:
                    continue

                self.process_js(j.root, fd)

        elif "javascript" in mime:
            ## Make a new parser
            j = Javascript.JSParser()
            j.parse_fd(fd)
            j.close()

            self.process_js(j.root, fd)
Exemplo n.º 4
0
    def process_string(self, fd, string):
        parser = HTML.HTMLParser(verbose=0)
        parser.feed(string)
        parser.close()

        self.process_readmessage(fd, parser)
        self.process_listing(fd, parser)
Exemplo n.º 5
0
    def display(self, value, row, result):
        parser = HTML.HTMLParser(tag_class=HTML.TextTag)
        parser.feed(value or '')
        parser.close()

        value = parser.root.innerHTML()

        result.text(value, wrap='full', font='typewriter')
Exemplo n.º 6
0
        def generator():
            parser = HTML.HTMLParser(tag_class=Curry(HTML.ResolvingHTMLTag,
                                                     inode_id=fd.lookup_id(),
                                                     case=self.case))
            #parser = HTML.HTMLParser(tag_class = HTML.Tag)
            data = fd.read(1000000)
            parser.feed(data)
            parser.close()

            yield parser.root.innerHTML()
Exemplo n.º 7
0
        def boring(self, metadata, data=''):
            ## We dont think its boring if our base class does not:
            ## And the data contains '<title>\s+Windows Live' in the top.
            if not Scanner.StoreAndScanType.boring(self, metadata, data) and \
                   re.search("<title>\s+Windows Live", data):
                ## Make a new parser:
                if not self.parser:
                    self.parser = HTML.HTMLParser(verbose=0)
                return False

            return True
Exemplo n.º 8
0
    def render_html(self, inode_id, table_renderer):
        import plugins.TableRenderers.HTMLBundle as HTMLBundle

        fsfd = FileSystem.DBFS(table_renderer.case)
        fd = fsfd.open(inode_id=inode_id)
        parser = HTML.HTMLParser(tag_class=HTML.SanitizingTag)

        parser.feed(fd.read(fd.size))
        parser.close()

        text = parser.root.innerHTML()
        return text
Exemplo n.º 9
0
        def boring(self, metadata, data=''):
            ## We dont think its boring if our base class does not:
            ## And the data contains '<title>\s+Yahoo! Mail' in the top.
            if not Scanner.StoreAndScanType.boring(self, metadata, data=''):
                m = re.search("<title>[^<]+Yahoo! Mail", data)
                if m:
                    self.username = None
                    ## Make a new parser:
                    if not self.parser:
                        self.parser = HTML.HTMLParser(verbose=0)
                    return False

            return True
Exemplo n.º 10
0
    def render_html(self, value, table_renderer):
        import plugins.TableRenderers.HTMLBundle as HTMLBundle

        parser = HTML.HTMLParser(tag_class=HTML.TextTag)

        parser.feed(value or '')
        parser.close()

        text = parser.root.innerHTML()

        ## Make sure its wrapped:
        ui = HTMLUI.HTMLUI(initial=True)
        ui.text(text, wrap='full', font='typewriter')
        return ui.__str__()
Exemplo n.º 11
0
 def fixup_page(self, root, tag_class):
     ## We have to inject the message into the edit area:
     edit_area = root.find("div", {"class":"EditArea"}) or \
                 root.find("div",{"id":"MsgContainer"}) or \
                 root.find("textarea",{"id":"fMessageBody"})
     if edit_area:
         parser = HTML.HTMLParser(tag_class=tag_class)
         parser.feed(HTML.decode(self.message))
         #parser.feed(self.message)
         parser.close()
         result = parser.root.__str__()
         result = textwrap.fill(result)
         edit_area.prune()
         edit_area.add_child(result)
         edit_area.name = 'div'
Exemplo n.º 12
0
        def boring(self, metadata, data=''):
            ## Yahoo web 2.0 is very nice to work with- All
            ## responses are in nice XML
            if not Scanner.StoreAndScanType.boring(self, metadata, data=''):
                m = re.search(
                    "<(GetDisplayMessageResponse|ListMessagesResponse|SendMessageResponse)",
                    data)
                if m:
                    self.context = m.group(1)
                    ## Make a new parser:
                    if not self.parser:
                        self.parser = HTML.HTMLParser(verbose=0)
                    return False

            return True
Exemplo n.º 13
0
def insert_message(self, result, inode_template="l%s"):
    ## We dont really want to touch the db in here - just print it out
    ## nicely:
    try:
        ## Try to render the html as text:
        message = unicode(result['Message'])
        p = HTML.HTMLParser(tag_class=HTML.TextTag)
        p.feed(message)
        p.close()

        result['Message'] = p.root.__str__()

    except KeyError:
        pass

    for k, v in result.items():
        print "   %s: %r" % (k, v)

    return True
Exemplo n.º 14
0
    def scan(self, fd, scanners, type, mime, cookie, **args):
        if "Yahoo Mail AJAX" in type:
            self.parser = HTML.HTMLParser(verbose=0)
            pyflaglog.log(
                pyflaglog.DEBUG,
                "Opening %s for YahooMail2.0 processing" % fd.inode_id)

            ## Read all the data into the parser
            self.context = None
            while 1:
                data = fd.read(1024 * 1024)
                if not data: break

                if not self.context: self.context = data
                self.parser.feed(data)

            self.parser.close()

            if 'GetDisplayMessageResponse' in self.context:
                self.process_readmessage(fd)
Exemplo n.º 15
0
    def fixup_page(self, result, message, tag_class):
        """ Given the parse tree in root, fix up the page so it looks
        as close as possible to the way it should. We write the new
        page on outfd.
        """
        if not message: return
        ## We have to inject the message into the edit area:
        edit_area = self.parser.root.find("div", {"class":"EditArea"}) or \
                    self.parser.root.find("div",{"id":"MsgContainer"}) or \
                    self.parser.root.find("textarea",{"id":"fMessageBody"})
        if edit_area:
            parser = HTML.HTMLParser(tag_class=tag_class)
            parser.feed(HTML.decode(message))
            parser.close()
            result = parser.root.__str__()
            result = textwrap.fill(result)
            edit_area.prune()
            edit_area.add_child(result)
            edit_area.name = 'div'

        return self.parser.root.innerHTML()
Exemplo n.º 16
0
        def process_readmessage(self, message):
            parser = HTML.HTMLParser(verbose=0)
            parser.feed(message)
            parser.close()

            result = {'type': 'Read', 'Message': ''}

            ## Find the subject
            sbj = parser.root.find('td', {'class': 'ReadMsgSubject'})
            if sbj: result['Subject'] = HTML.decode_entity(sbj.innerHTML())

            context = None
            for td in parser.root.search('td'):
                data = td.innerHTML()
                if context:
                    result[context] = HTML.decode_entity(data)
                    context = None

                if data.lower().startswith('from:'):
                    context = 'From'
                elif data.lower().startswith('to:'):
                    context = 'To'
                elif data.lower().startswith('sent:'):
                    context = 'Sent'

            msg = parser.root.find('div', {'class': 'ReadMsgContainer'})
            if msg:
                result['Message'] = msg.innerHTML()

            ## Try to detect the message ID
            tag = parser.root.find('div', {'mid': '.'})
            if tag:
                result['message_id'] = tag['mid']

            try:
                result[context] = Time.parse(result[context])
            except:
                pass

            return self.insert_message(result, inode_template='l%s')
Exemplo n.º 17
0
    def process_send_message(self, fd):
        dbh = DB.DBO(self.case)
        dbh.execute(
            "select `key`,`value`,`indirect` from http_parameters where `key`='body' and inode_id = %r limit 1",
            self.fd.inode_id)
        row = dbh.fetch()
        if not row: return

        inode_id = row['indirect']
        if not inode_id: return

        ## Need to parse the sent message
        fsfd = FileSystem.DBFS(self.case)
        fd = fsfd.open(inode_id=inode_id)
        self.parser = HTML.HTMLParser(verbose=0)
        self.parser.feed(fd.read())
        self.parser.close()
        root = self.parser.root

        result = {'type': 'Edit Sent'}
        result['From'] = self.parse_email_address(root, 'from')
        result['To'] = self.parse_email_address(root, 'to')
        try:
            result['message'] = root.find("text").innerHTML()
        except:
            pass

        ## Sometimes they also give us the html version
        #try:
        #    result['message'] = root.find("html").innerHTML()
        #except: pass

        try:
            result['subject'] = root.find("subject").innerHTML()
        except:
            pass

        self.insert_message(result, "webmail")
Exemplo n.º 18
0
    def scan(self, fd, scanners, type, mime, cookie, **args):
        if "HTML" in type:
            data = fd.read(1024)
            if not re.search("<title>\s+Windows Live", data): return

            ## Ok - we know its a Live page
            pyflaglog.log(
                pyflaglog.DEBUG, "Opening (%s) %s for Hotmail processing" %
                (fd.inode_id, fd.urn))
            self.parser = HTML.HTMLParser(verbose=0)
            self.parser.feed(data.decode("utf8", "ignore"))

            while len(data) > 0:
                data = fd.read(1024)
                self.parser.feed(data.decode("utf8", "ignore"))
                ## Get all the tokens
                while self.parser.next_token(True):
                    pass

            ## Now we should be able to parse the data out:
            self.process_send_message(fd)
            self.process_editread(fd)
            self.process_readmessage(fd)
            self.process_mail_listing(fd)
Exemplo n.º 19
0
 def sanitize_data(self, data, value, result):
     parser = HTML.HTMLParser(tag_class = \
                              FlagFramework.Curry(MessageTags,
                                                  case = self.case,
                                                  inode_id = value))
Exemplo n.º 20
0
    def xxxdisplay(self, value, row, result):
        parser = HTML.HTMLParser(tag_class=HTML.SanitizingTag)
        parser.feed(value)
        parser.close()

        return parser.root.innerHTML()
Exemplo n.º 21
0
    def scan(self, fd, scanners, type, mime, cookie, **args):
        if "Google Image Search" in type:
            pyflaglog.log(pyflaglog.DEBUG,"Opening %s for Google image search processing" % fd.inode_id)
            ## Parse the file
            self.parser = HTML.HTMLParser()        
            self.parser.feed(fd.read())
            self.parser.close()
            
            ## Pull out all the scripts and match the regex:
            result = ''
            image_text = ''
            text_text = ''
            count = 0
            total_count = 0
            regex = re.compile('dyn.Img(\(.+?\));')
            for script in self.parser.root.search("script"):
                data = script.innerHTML()
                for m in regex.finditer(data):
                    row = eval(m.group(1),{},{})
                    image_text += '''\n<td id="tDataImage%s" nowrap="" width="16%%" valign="bottom" align="center" style="padding-top: 0px;">
                    <a href="%s">
                    <img height="%s" width="%s" src="%s?q=tbn:%s%s" style="border: 1px solid ;"/>
                    </a>
                    </td>\n''' % (total_count, row[0], row[5], row[4], row[14], row[2], row[3])

                    text_text += '''<td id="tDataText%s" width="16%%" valign="top" align="center">
                    <font face="arial,sans-serif" size="-1">
                    %s
                    <br/>
                    %s - %s
                    <br/>
                    <font color="#008000">%s</font>
                    </font>
                    </td>''' % (total_count, row[6], row[9], row[10], row[11])
                    
                    count += 1
                    total_count += 1
                    
                    if count >= 5:
                        result += "<tr>%s</tr>\n<tr>%s</tr>\n" % (image_text, text_text)
                        image_text = ''
                        text_text = ''
                        count = 0

            if image_text:
                result += "<tr>%s</tr>\n<tr>%s</tr>\n" % (image_text, text_text)

            if result:
                ## Prepare the new page
                tag = self.parser.root.find("div", {"id":"ImgContent"})
                if tag:
                    result = "<table>%s</table>" % result
                    tag.add_child(result)

                page = self.parser.root.innerHTML()
                page = page.encode("utf8","ignore")

                new_fd = CacheManager.AFF4_MANAGER.create_cache_data(
                    fd.case,
                    "%s/Gimage" % fd.urn,
                    page, inherited = fd.urn)

                new_fd.close()