Python download示例，golismero.api.net.web_utils.download Python示例

示例#1

0

显示文件

文件： suspicious_url.py 项目： jekkay/golismero

    def recv_info(self, info):

        m_parsed_url = info.parsed_url
        m_results = []

        #------------------------------------------------------------------
        # Find suspicious URLs by matching against known substrings.

        # Load wordlists
        m_wordlist_middle     = WordListLoader.get_wordlist(Config.plugin_config['middle'])
        m_wordlist_extensions = WordListLoader.get_wordlist(Config.plugin_config['extensions'])

        # Add matching keywords at any positions of URL.
        m_results.extend([SuspiciousURLPath(info, x)
                          for x in m_wordlist_middle
                          if x in m_parsed_url.directory.split("/") or
                          x == m_parsed_url.filebase or
                          x == m_parsed_url.extension])

        # Add matching keywords at any positions of URL.
        m_results.extend([SuspiciousURLPath(info, x)
                          for x in m_wordlist_extensions
                          if m_parsed_url.extension == x])

        #------------------------------------------------------------------
        # Find suspicious URLs by calculating the Shannon entropy of the hostname.
        # Idea from: https://github.com/stricaud/urlweirdos/blob/master/src/urlw/plugins/shannon/__init__.py
        # TODO: test with unicode enabled hostnames!

        # Check the Shannon entropy for the hostname.
        hostname = info.parsed_url.hostname
        entropy = calculate_shannon_entropy(hostname)
        if entropy > 4.0:
            m_results.append( SuspiciousURLPath(info, hostname) )

        # Check the Shannon entropy for the subdomains.
        for subdomain in info.parsed_url.hostname.split('.'):
            if len(subdomain) > 3:
                entropy = calculate_shannon_entropy(subdomain)
                if entropy > 4.0:
                    m_results.append( SuspiciousURLPath(info, subdomain) )

        #------------------------------------------------------------------
        #
        #
        #
        # Get malware suspicious links
        #
        #
        #
        #------------------------------------------------------------------
        p     = None
        m_url = info.url
        Logger.log_more_verbose("Looking for output links to malware sites")
        try:
            allow_redirects = Config.audit_config.follow_redirects or \
                             (info.depth == 0 and Config.audit_config.follow_first_redirect)
            p = download(m_url, self.check_download, allow_redirects=allow_redirects)
        except NetworkException,e:
            Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e)))

示例#2

0

显示文件

文件： fingerprint_os.py 项目： 0day1day/golismero

    def is_URL_in_windows(self, main_url):
        """
        Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and
        does two resquest. If are the same response, then, platform are Windows. Else are \*NIX.

        :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown.
        :rtype: bool
        """
        m_forbidden = (
            "logout",
            "logoff",
            "exit",
            "sigout",
            "signout",
        )

        # Get the main web page
        m_r = download(main_url, callback=self.check_download)
        if not m_r or not m_r.raw_data:
            return None
        discard_data(m_r)

        # Get the first link
        m_links = None
        try:
            if m_r.information_type == Information.INFORMATION_HTML:
                m_links = extract_from_html(m_r.raw_data, main_url)
            else:
                m_links = extract_from_text(m_r.raw_data, main_url)
        except TypeError,e:
            Logger.log_error_more_verbose("Plugin error: %s" % format_exc())
            return None

示例#3

0

显示文件

    def recv_info(self, info):

        m_return = []

        m_url = info.url
        m_depth = info.depth

        # Check depth
        if Config.audit_config.depth is not None and m_depth > Config.audit_config.depth:
            Logger.log_more_verbose("Spider depth level exceeded for URL: %s" %
                                    m_url)
            return m_return

        Logger.log_verbose("Spidering URL: %r" % m_url)

        # Check if need follow first redirect
        p = None
        try:
            allow_redirects = Config.audit_config.follow_redirects or \
                             (m_depth == 0 and Config.audit_config.follow_first_redirect)
            p = download(m_url,
                         self.check_download,
                         allow_redirects=allow_redirects)
        except NetworkException, e:
            Logger.log_more_verbose("Error while processing %r: %s" %
                                    (m_url, str(e)))

示例#4

0

显示文件

文件： fingerprint_os.py 项目： sechacking/golismero

    def is_URL_in_windows(self, main_url):
        """
        Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and
        does two resquest. If are the same response, then, platform are Windows. Else are \*NIX.

        :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown.
        :rtype: bool
        """
        m_forbidden = (
            "logout",
            "logoff",
            "exit",
            "sigout",
            "signout",
        )

        # Get the main web page
        m_r = download(main_url, callback=self.check_download)
        if not m_r or not m_r.raw_data:
            return None
        discard_data(m_r)

        # Get the first link
        m_links = None
        try:
            if m_r.information_type == Information.INFORMATION_HTML:
                m_links = extract_from_html(m_r.raw_data, main_url)
            else:
                m_links = extract_from_text(m_r.raw_data, main_url)
        except TypeError, e:
            Logger.log_error_more_verbose("Plugin error: %s" % format_exc())
            return None

示例#5

0

显示文件

文件： gxhacking.py 项目： elcodigok/golismero-devel

def find_htm_file(url):
    new_file = []
    for file_name in ['DeveloperMenu.htm']:
        url_check = url[1:] if url.startswith("/") else url
        tmp_u = urljoin(url_check, file_name)
        p = HTTP.get_url(tmp_u, use_cache=False, method="GET")
        if p.status == "200":
            file_save = download(tmp_u)
            new_file = re.findall(r'href=[\'"]?([^\'" >]+)', file_save.raw_data)
    
    return new_file

示例#6

0

显示文件

文件： spider.py 项目： Diviei/golismero

    def recv_info(self, info):

        m_return = []

        m_url = info.url

        Logger.log_verbose("Spidering URL: %r" % m_url)

        # Check if need follow first redirect
        p = None
        try:
            allow_redirects = Config.audit_config.follow_redirects or \
                             (info.depth == 0 and Config.audit_config.follow_first_redirect)
            p = download(m_url, self.check_download, allow_redirects=allow_redirects)
        except NetworkException,e:
            Logger.log_more_verbose("Error while processing %r: %s" % (m_url, str(e)))

示例#7

0

显示文件

文件： robots.py 项目： elcodigok/golismero

    def recv_info(self, info):
        m_return = []

        m_url = info.url
        m_hostname = info.hostname
        m_url_robots_txt = urljoin(m_url, 'robots.txt')

        p = None
        try:
            msg = "Looking for robots.txt in: %s" % m_hostname
            Logger.log_more_verbose(msg)
            p = download(m_url_robots_txt, self.check_download)
        except NetworkOutOfScope:
            Logger.log_more_verbose("URL out of scope: %s" % (m_url_robots_txt))
            return
        except Exception, e:
            Logger.log_more_verbose("Error while processing %r: %s" % (m_url_robots_txt, str(e)))
            return

示例#8

0

显示文件

    def run(self, info):

        m_return = []

        m_url = info.url
        Logger.log_verbose("Spidering URL: %s" % m_url)

        # Check if need follow first redirect, then follow the link.
        p = None
        try:
            allow_redirects = Config.audit_config.follow_redirects or \
                (info.depth == 0 and Config.audit_config.follow_first_redirect)
            p = download(m_url,
                         self.check_download,
                         allow_redirects=allow_redirects)
        except NetworkException, e:
            Logger.log_error_verbose("Error while processing %r: %s" %
                                     (m_url, str(e)))

示例#9

0

显示文件

文件： gxhacking.py 项目： elcodigok/golismero-devel

def find_xml_files(url):
    new_file = []
    for file_name in ['execute.xml', 'DeveloperMenu.xml']:
        url_check = url[1:] if url.startswith("/") else url
        tmp_u = urljoin(url_check, file_name)
        p = HTTP.get_url(tmp_u, use_cache=False, method="GET")
        if p.status == "200":
            file_save = download(tmp_u)
            tree = ET.fromstring(file_save.raw_data)
            try:
                for links in tree.findall('Object'):
                    Logger.log(links.find('ObjLink').text)
                    new_file.append(links.find('ObjLink').text)
            except Exception:
                ##raise # XXX DEBUG
                pass
    
    return new_file

示例#10

0

显示文件

    def run(self, info):
        m_return = []

        m_url = info.url
        m_hostname = info.hostname
        m_url_robots_txt = urljoin(m_url, 'robots.txt')

        p = None
        try:
            msg = "Looking for robots.txt in: %s" % m_hostname
            Logger.log_more_verbose(msg)
            p = download(m_url_robots_txt, self.check_download)
        except NetworkOutOfScope:
            Logger.log_more_verbose("URL out of scope: %s" %
                                    (m_url_robots_txt))
            return
        except Exception, e:
            Logger.log_more_verbose("Error while processing %r: %s" %
                                    (m_url_robots_txt, str(e)))
            return

示例#11

0

显示文件

文件： fingerprint_os.py 项目： atimorin/golismero

    def is_URL_in_windows(self, main_url):
        """
        Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and
        does two resquest. If are the same response, then, platform are Windows. Else are \*NIX.

        :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown.
        :rtype: bool
        """
        m_forbidden = (
            "logout",
            "logoff",
            "exit",
            "sigout",
            "signout",
        )

        # Get the main web page
        m_r = download(main_url, callback=self.check_download)
        if not m_r:
            return None
        discard_data(m_r)

        # Get the first link
        if m_r.information_type == Information.INFORMATION_HTML:
            m_links = extract_from_html(m_r.raw_data, main_url)
        else:
            m_links = extract_from_text(m_r.raw_data, main_url)

        if not m_links:
            return None

        # Get the first link of the page that's in scope of the audit
        m_first_link = None
        for u in m_links:
            if u in Config.audit_scope and not any(x in u for x in m_forbidden):
                m_first_link = u
                break

        if not m_first_link:
            return None

        # Now get two request to the links. One to the original URL and other
        # as upper URL.

        # Original
        m_response_orig  = HTTP.get_url(m_first_link, callback=self.check_response)  # FIXME handle exceptions!
        discard_data(m_response_orig)
        # Uppercase
        m_response_upper = HTTP.get_url(m_first_link.upper(), callback=self.check_response)  # FIXME handle exceptions!
        discard_data(m_response_upper)
        # Compare them
        m_orig_data      = m_response_orig.raw_response  if m_response_orig  else ""
        m_upper_data     = m_response_upper.raw_response if m_response_upper else ""
        m_match_level    = get_diff_ratio(m_orig_data, m_upper_data)

        # If the responses are equal by 90%, two URL are the same => Windows; else => *NIX
        m_return = None
        if m_match_level > 0.95:
            m_return = True
        else:
            m_return = False

        return m_return

示例#12

0

显示文件

文件： plecost.py 项目： elcodigok/golismero

    def __get_wordpress_version(self, url):
        """
        This function get the current version of wordpress and the last version
        available for download.

        :param url: URL fo target.
        :type url: str.

        :return: a tuple with (CURRENT_VERSION, LAST_AVAILABLE_VERSION)
        :type: tuple(str, str)
        """
        url_version = {
            # Generic
            "wp-login.php": r"(;ver=)([0-9\.]+)([\-a-z]*)",

            # For WordPress 3.8
            "wp-admin/css/wp-admin-rtl.css": r"(Version[\s]+)([0-9\.]+)",
            "wp-admin/css/wp-admin.css": r"(Version[\s]+)([0-9\.]+)"
        }

        #
        # Get current version
        #

        # URL to find wordpress version
        url_current_version = urljoin(url, "readme.html")
        current_version_content_1 = download(url_current_version)

        if isinstance(current_version_content_1, HTML):
            current_version_method1 = re.search(r"(<br/>[\s]*[vV]ersion[\s]*)([0-9\.]*)", current_version_content_1.raw_data)
            if current_version_method1 is None:
                current_version_method1 = None
            else:
                if len(current_version_method1.groups()) != 2:
                    current_version_method1 = None
                else:
                    current_version_method1 = current_version_method1.group(2)
        else:
            current_version_method1 = None

        # Try to find the version into HTML meta value

        # Get content of main page
        current_version_content_2 = download(url)

        # Try to find the info
        current_version_method2 = re.search(r"(<meta name=\"generator\" content=\"WordPress[\s]+)([0-9\.]+)",
                                            current_version_content_2.raw_data)
        if current_version_method2 is None:
            current_version_method2 = None
        else:
            if len(current_version_method2.groups()) != 2:
                current_version_method2 = None
            else:
                current_version_method2 = current_version_method2.group(2)

        # Match versions of the diffentents methods
        current_version = "unknown"
        if current_version_method1 is None and current_version_method2 is None:
            current_version = "unknown"
        elif current_version_method1 is None and current_version_method2 is not None:
            current_version = current_version_method2
        elif current_version_method1 is not None and current_version_method2 is None:
            current_version = current_version_method1
        elif current_version_method1 is not None and current_version_method2 is not None:
            if current_version_method1 != current_version_method2:
                current_version = current_version_method2
            else:
                current_version = current_version_method1
        else:
            current_version = "unknown"

        # If Current version not found
        if current_version == "unknown":
            for url_pre, regex in url_version.iteritems():
                # URL to find wordpress version
                url_current_version = urljoin(url, url_pre)
                current_version_content = download(url_current_version)
                discard_data(current_version_content)

                # Find the version
                tmp_version = re.search(regex, current_version_content.raw_data)

                if tmp_version is not None:
                    current_version = tmp_version.group(2)
                    break  # Found -> stop search

        #
        # Get last version
        #

        # URL to get last version of WordPress available
        url_last_version = "http://wordpress.org/download/"
        last_version_content = download(url_last_version, allow_out_of_scope=True)

        if isinstance(last_version_content, HTML):
            last_version = re.search("(WordPress&nbsp;)([0-9\.]*)", last_version_content.raw_data)

            if last_version is None:
                last_version = "unknown"
            else:
                if len(last_version.groups()) != 2:
                    last_version = "unknown"
                else:
                    last_version = last_version.group(2)
        else:
            last_version = "unknown"

        # Discard unused data
        discard_data(current_version_content_2)
        discard_data(current_version_content_1)
        discard_data(last_version_content)

        return current_version, last_version

示例#13

0

显示文件

文件： fingerprint_os.py 项目： damarsan/golismero

    def is_URL_in_windows(self, main_url):
        """
        Detect if platform is Windows or \*NIX. To do this, get the first link, in scope, and
        does two resquest. If are the same response, then, platform are Windows. Else are \*NIX.

        :returns: True, if the remote host is a Windows system. False is \*NIX or None if unknown.
        :rtype: bool
        """
        m_forbidden = (
            "logout",
            "logoff",
            "exit",
            "sigout",
            "signout",
        )

        # Get the main web page
        m_r = download(main_url, callback=self.check_download)
        if not m_r:
            return None
        discard_data(m_r)

        # Get the first link
        if m_r.information_type == Information.INFORMATION_HTML:
            m_links = extract_from_html(m_r.raw_data, main_url)
        else:
            m_links = extract_from_text(m_r.raw_data, main_url)

        if not m_links:
            return None

        # Get the first link of the page that's in scope of the audit
        m_first_link = None
        for u in m_links:
            if u in Config.audit_scope and not any(x in u
                                                   for x in m_forbidden):
                m_first_link = u
                break

        if not m_first_link:
            return None

        # Now get two request to the links. One to the original URL and other
        # as upper URL.

        # Original
        m_response_orig = HTTP.get_url(
            m_first_link,
            callback=self.check_response)  # FIXME handle exceptions!
        discard_data(m_response_orig)
        # Uppercase
        m_response_upper = HTTP.get_url(
            m_first_link.upper(),
            callback=self.check_response)  # FIXME handle exceptions!
        discard_data(m_response_upper)
        # Compare them
        m_orig_data = m_response_orig.raw_response if m_response_orig else ""
        m_upper_data = m_response_upper.raw_response if m_response_upper else ""
        m_match_level = get_diff_ratio(m_orig_data, m_upper_data)

        # If the responses are equal by 90%, two URL are the same => Windows; else => *NIX
        m_return = None
        if m_match_level > 0.95:
            m_return = True
        else:
            m_return = False

        return m_return

示例#14

0

显示文件

文件： plecost.py 项目： repodiscover/golismero

    def __get_wordpress_version(self, url):
        """
        This function get the current version of wordpress and the last version
        available for download.

        :param url: URL fo target.
        :type url: str.

        :return: a tuple with (CURRENT_VERSION, LAST_AVAILABLE_VERSION)
        :type: tuple(str, str)
        """
        url_version = {
            # Generic
            "wp-login.php": r"(;ver=)([0-9\.]+)([\-a-z]*)",

            # For WordPress 3.8
            "wp-admin/css/wp-admin-rtl.css": r"(Version[\s]+)([0-9\.]+)",
            "wp-admin/css/wp-admin.css": r"(Version[\s]+)([0-9\.]+)"
        }

        #
        # Get current version
        #

        # URL to find wordpress version
        url_current_version = urljoin(url, "readme.html")
        current_version_content_1 = download(url_current_version)

        if isinstance(current_version_content_1, HTML):
            current_version_method1 = re.search(
                r"(<br/>[\s]*[vV]ersion[\s]*)([0-9\.]*)",
                current_version_content_1.raw_data)
            if current_version_method1 is None:
                current_version_method1 = None
            else:
                if len(current_version_method1.groups()) != 2:
                    current_version_method1 = None
                else:
                    current_version_method1 = current_version_method1.group(2)
        else:
            current_version_method1 = None

        # Try to find the version into HTML meta value

        # Get content of main page
        current_version_content_2 = download(url)

        # Try to find the info
        current_version_method2 = re.search(
            r"(<meta name=\"generator\" content=\"WordPress[\s]+)([0-9\.]+)",
            current_version_content_2.raw_data)
        if current_version_method2 is None:
            current_version_method2 = None
        else:
            if len(current_version_method2.groups()) != 2:
                current_version_method2 = None
            else:
                current_version_method2 = current_version_method2.group(2)

        # Match versions of the diffentents methods
        current_version = "unknown"
        if current_version_method1 is None and current_version_method2 is None:
            current_version = "unknown"
        elif current_version_method1 is None and current_version_method2 is not None:
            current_version = current_version_method2
        elif current_version_method1 is not None and current_version_method2 is None:
            current_version = current_version_method1
        elif current_version_method1 is not None and current_version_method2 is not None:
            if current_version_method1 != current_version_method2:
                current_version = current_version_method2
            else:
                current_version = current_version_method1
        else:
            current_version = "unknown"

        # If Current version not found
        if current_version == "unknown":
            for url_pre, regex in url_version.iteritems():
                # URL to find wordpress version
                url_current_version = urljoin(url, url_pre)
                current_version_content = download(url_current_version)
                discard_data(current_version_content)

                # Find the version
                tmp_version = re.search(regex,
                                        current_version_content.raw_data)

                if tmp_version is not None:
                    current_version = tmp_version.group(2)
                    break  # Found -> stop search

        #
        # Get last version
        #

        # URL to get last version of WordPress available
        url_last_version = "http://wordpress.org/download/"
        last_version_content = download(url_last_version,
                                        allow_out_of_scope=True)

        if isinstance(last_version_content, HTML):
            last_version = re.search("(WordPress&nbsp;)([0-9\.]*)",
                                     last_version_content.raw_data)

            if last_version is None:
                last_version = "unknown"
            else:
                if len(last_version.groups()) != 2:
                    last_version = "unknown"
                else:
                    last_version = last_version.group(2)
        else:
            last_version = "unknown"

        # Discard unused data
        discard_data(current_version_content_2)
        discard_data(current_version_content_1)
        discard_data(last_version_content)

        return current_version, last_version