Пример #1
0
    def run_local(self, url):
        log.last_url = None
        log.last_url_fetched = None

        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content = open(url, 'r').read()
        extension = os.path.splitext(url)
        encoding = cchardet.detect(content)

        if len(extension) > 1 and extension[1].lower() in (
                '.js',
                '.jse',
        ):
            if not content.lstrip().startswith('<script'):
                html = tostring(
                    E.HTML(
                        E.HEAD(),
                        E.BODY(E.SCRIPT(content.decode(
                            encoding['encoding'])))))
            else:
                soup = BeautifulSoup(content, "html.parser")

                try:
                    soup.html.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.head.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.body.unwrap()
                except AttributeError:
                    pass

                html = tostring(
                    E.HTML(E.HEAD(), E.BODY(E.SCRIPT(soup.script.get_text()))))
        else:
            html = content

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.add_characters_count(len(html))
            log.ThugLogging.Features.add_whitespaces_count(
                len([a for a in html if a.isspace()]))

        doc = w3c.parseString(html)
        window = Window('about:blank', doc, personality=log.ThugOpts.useragent)
        window.open()
        self.__run(window)
Пример #2
0
    def run_local(self, url):
        log.last_url = None
        log.last_url_fetched = None

        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content = open(url, 'r', encoding="utf-8").read()
        extension = os.path.splitext(url)

        if len(extension) > 1 and extension[1].lower() in (
                '.js',
                '.jse',
        ):
            if not content.lstrip().startswith('<script'):
                html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(content))))
            else:
                soup = bs4.BeautifulSoup(content, "html.parser")

                try:
                    soup.html.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.head.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.body.unwrap()
                except AttributeError:
                    pass

                code = soup.script.get_text(types=(NavigableString, CData,
                                                   Script))
                html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(code))))
        else:
            html = content

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.add_characters_count(len(html))

            whitespaces_count = len([
                a for a in html
                if isinstance(a, six.string_types) and a.isspace()
            ])
            log.ThugLogging.Features.add_whitespaces_count(whitespaces_count)

        doc = w3c.parseString(html)
        window = Window('about:blank', doc, personality=log.ThugOpts.useragent)
        window.open()
        self.__run(window)
Пример #3
0
    def run_remote(self, url):
        log.last_url = None
        log.last_url_fetched = None

        log.ThugOpts.local = False

        try:
            scheme = urlparse.urlparse(url).scheme
        except ValueError as e:
            log.warning("[WARNING] Analysis not performed (%s)", e.message)
            return

        if not scheme or not scheme.startswith('http'):
            url = 'http://%s' % (url, )

        log.ThugLogging.set_url(url)

        log.HTTPSession = HTTPSession()

        doc = w3c.parseString('')
        window = Window(log.ThugOpts.referer,
                        doc,
                        personality=log.ThugOpts.useragent)
        window = window.open(url)
        if window:
            self.__run(window)
Пример #4
0
    def run_local(self, url):
        log.last_url = None
        log.last_url_fetched = None

        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content   = open(url, 'r').read()
        extension = os.path.splitext(url)
        encoding  = cchardet.detect(content)

        if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ):
            if not content.lstrip().startswith('<script'):
                html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(content.decode(encoding['encoding'])))))
            else:
                soup = BeautifulSoup(content, "html.parser")

                try:
                    soup.html.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.head.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.body.unwrap()
                except AttributeError:
                    pass

                html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(soup.script.get_text()))))
        else:
            html = content

        if log.ThugOpts.features_logging:
            log.ThugLogging.Features.add_characters_count(len(html))
            log.ThugLogging.Features.add_whitespaces_count(len([a for a in html if a.isspace()]))

        doc    = w3c.parseString(html)
        window = Window('about:blank', doc, personality = log.ThugOpts.useragent)
        window.open()
        self.__run(window)
Пример #5
0
    def run_local(self, url):
        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content = open(url, 'r').read()
        extension = os.path.splitext(url)

        if len(extension) > 1 and extension[1].lower() in ('.js'):
            html = tostring(E.HTML(E.BODY(E.SCRIPT(content))))
        else:
            html = content

        doc = w3c.parseString(html)
        window = Window('about:blank', doc, personality=log.ThugOpts.useragent)
        window.open()
        self.run(window)
Пример #6
0
    def run_local(self, url):
        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content   = open(url, 'r').read()
        extension = os.path.splitext(url)

        if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ):
            html = tostring(E.HTML(E.BODY(E.SCRIPT(content))))
        else:
            html = content

        doc    = w3c.parseString(html)
        window = Window('about:blank', doc, personality = log.ThugOpts.useragent)
        window.open()
        self.__run(window)
Пример #7
0
    def run_local(self, url):
        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content = open(url, 'r').read()
        extension = os.path.splitext(url)

        if len(extension) > 1 and extension[1].lower() in (
                '.js',
                '.jse',
        ):
            if not content.lstrip().startswith('<script'):
                html = tostring(E.HTML(E.HEAD(), E.BODY(E.SCRIPT(content))))
            else:
                soup = BeautifulSoup(content, "html.parser")

                try:
                    soup.html.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.head.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.body.unwrap()
                except AttributeError:
                    pass

                html = tostring(
                    E.HTML(E.HEAD(), E.BODY(E.SCRIPT(soup.script.get_text()))))
        else:
            html = content

        doc = w3c.parseString(html)
        window = Window('about:blank', doc, personality=log.ThugOpts.useragent)
        window.open()
        self.__run(window)
Пример #8
0
    def run_local(self, url):
        log.ThugLogging.set_url(url)
        log.ThugOpts.local = True

        log.HTTPSession = HTTPSession()

        content   = open(url, 'r').read()
        extension = os.path.splitext(url)

        if len(extension) > 1 and extension[1].lower() in ('.js', '.jse', ):
            if not content.lstrip().startswith('<script'):
                html = tostring(E.HTML(E.BODY(E.SCRIPT(content))))
            else:
                soup = BeautifulSoup(content, "html.parser")

                try:
                    soup.html.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.head.unwrap()
                except AttributeError:
                    pass

                try:
                    soup.body.unwrap()
                except AttributeError:
                    pass

                html = tostring(E.HTML(E.BODY(E.SCRIPT(soup.script.get_text()))))
        else:
            html = content

        doc    = w3c.parseString(html)
        window = Window('about:blank', doc, personality = log.ThugOpts.useragent)
        window.open()
        self.__run(window)
Пример #9
0
    def run_remote(self, url):
        scheme = urlparse.urlparse(url).scheme

        if not scheme or not scheme.startswith('http'):
            url = 'http://%s' % (url, )

        log.ThugLogging.set_url(url)

        log.HTTPSession = HTTPSession()

        doc    = w3c.parseString('')
        window = Window(log.ThugOpts.referer, doc, personality = log.ThugOpts.useragent)
        window = window.open(url)
        if window:
            self.__run(window)
Пример #10
0
    def run_remote(self, url):
        scheme = urlparse.urlparse(url).scheme

        if not scheme or not scheme.startswith('http'):
            url = 'http://%s' % (url, )

        log.ThugLogging.set_url(url)

        log.HTTPSession = HTTPSession()

        doc    = w3c.parseString('')
        window = Window(log.ThugOpts.referer, doc, personality = log.ThugOpts.useragent)
        window = window.open(url)
        if window:
            self.__run(window)
Пример #11
0
    def run_remote(self, url):
        log.last_url = None
        log.last_url_fetched = None

        log.ThugOpts.local = False

        try:
            scheme = urlparse.urlparse(url).scheme
        except ValueError as e:
            log.warning("[WARNING] Analysis not performed (%s)", e.message)
            return

        if not scheme or not scheme.startswith('http'):
            url = 'http://%s' % (url, )

        log.ThugLogging.set_url(url)

        log.HTTPSession = HTTPSession()

        doc    = w3c.parseString('')
        window = Window(log.ThugOpts.referer, doc, personality = log.ThugOpts.useragent)
        window = window.open(url)
        if window:
            self.__run(window)