示例#1
0
def paginated_mode(suffix, options, uid, api_key):
    # Cache hostnames in a dict for de-duping.
    hostnames_map = {}

    certificate_api = certificates.CensysCertificates(uid, api_key)

    if 'query' in options and options['query']:
        query = options['query']
    else:
        query = "parsed.subject.common_name:\"%s\" or parsed.extensions.subject_alt_name.dns_names:\"%s\"" % (
            suffix, suffix)
    logging.debug("Censys query:\n%s\n" % query)

    # time to sleep between requests (defaults to 5s)
    delay = int(options.get("delay", 5))

    # Censys page size, fixed
    page_size = 100

    # Start page defaults to 1.
    start_page = int(options.get("start", 1))

    # End page defaults to whatever the API says is the last one.
    end_page = options.get("end", None)
    if end_page is None:
        end_page = get_end_page(query, certificate_api)
        if end_page is None:
            logging.warn("Error looking up number of pages.")
            exit(1)
    else:
        end_page = int(end_page)

    max_records = ((end_page - start_page) + 1) * page_size

    fields = [
        "parsed.subject.common_name",
        "parsed.extensions.subject_alt_name.dns_names"
    ]

    current_page = start_page

    logging.warn("Fetching up to %i records, starting at page %i." %
                 (max_records, start_page))
    last_cached = False
    force = options.get("force", False)

    while current_page <= end_page:
        if (not last_cached) and (current_page > start_page):
            logging.debug("(Waiting %is before fetching page %i.)" %
                          (delay, current_page))
            last_cached = False
            time.sleep(delay)

        logging.debug("Fetching page %i." % current_page)

        cache_page = utils.cache_path(str(current_page), "censys")
        if (force is False) and (os.path.exists(cache_page)):
            logging.warn("\t[%i] Cached page." % current_page)
            last_cached = True

            certs_raw = open(cache_page).read()
            certs = json.loads(certs_raw)
            if (certs.__class__ is dict) and certs.get('invalid'):
                continue
        else:
            try:
                certs = list(
                    certificate_api.search(query,
                                           fields=fields,
                                           page=current_page,
                                           max_records=page_size))
                utils.write(utils.json_for(certs), cache_page)
            except censys.base.CensysException:
                logging.warn(utils.format_last_exception())
                logging.warn("Censys error, skipping page %i." % current_page)
                utils.write(utils.invalid({}), cache_page)
                continue
            except:
                logging.warn(utils.format_last_exception())
                logging.warn("Unexpected error, skipping page %i." %
                             current_page)
                utils.write(utils.invalid({}), cache_page)
                exit(1)

        for cert in certs:
            # Common name + SANs
            names = cert.get('parsed.subject.common_name', []) + cert.get(
                'parsed.extensions.subject_alt_name.dns_names', [])
            logging.debug(names)

            for name in names:
                hostnames_map[sanitize_name(name)] = None

        current_page += 1

    logging.debug("Done fetching from API.")

    return hostnames_map
示例#2
0
def basic_check(endpoint):
    logging.debug("pinging %s..." % endpoint.url)

    # Test the endpoint. At first:
    #
    # * Don't follow redirects. (Will only follow if necessary.)
    #   If it's a 3XX, we'll ping again to follow redirects. This is
    #   necessary to reliably scope any errors (e.g. TLS errors) to
    #   the original endpoint.
    #
    # * Validate certificates. (Will figure out error if necessary.)
    try:

        req = ping(endpoint.url)

        endpoint.live = True
        if endpoint.protocol == "https":
            endpoint.https_valid = True

    except requests.exceptions.SSLError:
        # Retry with certificate validation disabled.
        try:
            req = ping(endpoint.url, verify=False)
        except requests.exceptions.SSLError:
            # If it's a protocol error or other, it's not live.
            endpoint.live = False
            logging.warn(
                "Unexpected SSL protocol (or other) error during retry.")
            return
        except requests.exceptions.RequestException:
            endpoint.live = False
            logging.warn(
                "Unexpected requests exception during retry. Printing error:")
            logging.warn(utils.format_last_exception())
            return

        # If it was a certificate error of any kind, it's live.
        endpoint.live = True

        # Figure out the error(s).
        https_check(endpoint)

    # And this is the parent of ConnectionError and other things.
    # For example, "too many redirects".
    # See https://github.com/kennethreitz/requests/blob/master/requests/exceptions.py
    except requests.exceptions.RequestException:
        endpoint.live = False
        logging.warn("Unexpected requests exception.")
        return

    # Endpoint is live, analyze the response.
    endpoint.headers = req.headers

    endpoint.status = req.status_code
    if str(endpoint.status).startswith('3'):
        endpoint.redirect = True

    if endpoint.redirect:

        location_header = req.headers.get('Location')
        # Absolute redirects (e.g. "https://example.com/Index.aspx")
        if location_header.startswith("http:") or location_header.startswith(
                "https:"):
            immediate = location_header

        # Relative redirects (e.g. "Location: /Index.aspx").
        # Construct absolute URI, relative to original request.
        else:
            immediate = urlparse.urljoin(endpoint.url, location_header)

        # Chase down the ultimate destination, ignoring any certificate warnings.
        ultimate_req = None
        try:
            ultimate_req = ping(endpoint.url,
                                allow_redirects=True,
                                verify=False)
        except requests.exceptions.RequestException:
            # Swallow connection errors, but we won't be saving redirect info.
            pass

        # Now establish whether the redirects were:
        # * internal (same exact hostname),
        # * within the zone (any subdomain within the parent domain)
        # * external (on some other parent domain)

        # The hostname of the endpoint (e.g. "www.agency.gov")
        subdomain_original = urlparse.urlparse(endpoint.url).hostname
        # The parent domain of the endpoint (e.g. "agency.gov")
        base_original = parent_domain_for(subdomain_original)

        # The hostname of the immediate redirect.
        # The parent domain of the immediate redirect.
        subdomain_immediate = urlparse.urlparse(immediate).hostname
        base_immediate = parent_domain_for(subdomain_immediate)

        endpoint.redirect_immediately_to = immediate
        endpoint.redirect_immediately_to_www = re.match(
            r'^https?://www\.', immediate)
        endpoint.redirect_immediately_to_https = immediate.startswith(
            "https://")
        endpoint.redirect_immediately_to_http = immediate.startswith("http://")
        endpoint.redirect_immediately_to_external = (base_original !=
                                                     base_immediate)
        endpoint.redirect_immediately_to_subdomain = (
            (base_original == base_immediate)
            and (subdomain_original != subdomain_immediate))

        if ultimate_req is not None:
            # For ultimate destination, use the URL we arrived at,
            # not Location header. Auto-resolves relative redirects.
            eventual = ultimate_req.url

            # The hostname of the eventual destination.
            # The parent domain of the eventual destination.
            subdomain_eventual = urlparse.urlparse(eventual).hostname
            base_eventual = parent_domain_for(subdomain_eventual)

            endpoint.redirect_eventually_to = eventual
            endpoint.redirect_eventually_to_https = eventual.startswith(
                "https://")
            endpoint.redirect_eventually_to_http = eventual.startswith(
                "http://")
            endpoint.redirect_eventually_to_external = (base_original !=
                                                        base_eventual)
            endpoint.redirect_eventually_to_subdomain = (
                (base_original == base_eventual)
                and (subdomain_original != subdomain_eventual))
示例#3
0
def export_mode(suffix, options, uid, api_key):
    # Cache hostnames in a dict for de-duping.
    hostnames_map = {}

    # Default timeout to 20 minutes.
    timeout = int(options.get("timeout", (60 * 60 * 20)))

    # Wait 5 seconds between checking on the job.
    between_jobs = 5

    try:
        export_api = export.CensysExport(uid, api_key)
    except censys.base.CensysUnauthorizedException:
        logging.warn(
            "The Censys.io Export API rejected the provided Censys credentials. The credentials may be inaccurate, or you may need to request access from the Censys.io team."
        )
        exit(1)

    # Uses a FLATTEN command in order to work around a BigQuery
    # error around multiple "repeated" fields. *shrug*
    query = "SELECT parsed.subject.common_name, parsed.extensions.subject_alt_name.dns_names from FLATTEN([certificates.certificates], parsed.extensions.subject_alt_name.dns_names) where parsed.subject.common_name LIKE \"%%%s\" OR parsed.extensions.subject_alt_name.dns_names LIKE \"%%%s\";" % (
        suffix, suffix)
    logging.debug("Censys query:\n%s\n" % query)

    download_file = utils.cache_path("export", "censys", ext="csv")

    force = options.get("force", False)

    if (force is False) and os.path.exists(download_file):
        logging.warn("Using cached download data.")
    else:
        logging.warn("Kicking off SQL query job.")
        results_url = None

        try:
            job = export_api.new_job(query, format='csv', flatten=True)
            job_id = job['job_id']

            started = datetime.datetime.now()
            while True:
                elapsed = (datetime.datetime.now() - started).seconds

                status = export_api.check_job(job_id)
                if status['status'] == 'error':
                    logging.warn("Error from Censys: %s" % status['error'])
                    exit(1)

                # Not expected, but better to explicitly handle.
                elif status['status'] == 'expired':
                    logging.warn("Results are somehow expired, bailing.")
                    exit(1)

                elif status['status'] == 'pending':
                    logging.debug("[%is] Job still pending." % elapsed)
                    time.sleep(between_jobs)

                elif status['status'] == 'success':
                    logging.warn("[%is] Job complete!" % elapsed)
                    results_url = status['download_paths'][0]
                    break

                if (elapsed > timeout):
                    logging.warn("Timeout waiting for job to complete.")
                    exit(1)

        except censys.base.CensysException:
            logging.warn(utils.format_last_exception())
            logging.warn("Censys error, aborting.")

        # At this point, the job is complete and we need to download
        # the resulting CSV URL in results_url.
        logging.warn("Downloading results of SQL query.")
        utils.download(results_url, download_file)

    # Read in downloaded CSV file, run any hostnames in each line
    # through the sanitizer, and de-dupe using the map.
    with open(download_file, newline='') as csvfile:
        for row in csv.reader(csvfile):
            if (not row[0]) or (
                    row[0].lower().startswith("parsed_subject_common_name")):
                continue

            names = [row[0].lower(), row[1].lower()]
            # logging.debug(names)

            for name in names:
                if name:
                    hostnames_map[sanitize_name(name)] = None

    return hostnames_map