Exemplo n.º 1
0
    def extract_rdns_info(self, ip):
        """
        Extract RDNS domain and zone information from the IP address
        """
        rnds_value = self.find_reverse_dns(ip)

        if rnds_value is None:
            return "", None

        if self._ZONES is None:
            self._ZONES = ZoneManager.get_distinct_zones(self.mongo_connector)

        rdns_zone = ZoneManager.get_root_domain(rnds_value, None)

        return rnds_value, rdns_zone
Exemplo n.º 2
0
def main():
    """
    Begin Main...
    """
    now = datetime.now()
    print("Starting: " + str(now))

    # Make database connections
    mc_connector = MongoConnector.MongoConnector()

    file_path = "/mnt/workspace/ct_facebook/"

    fb_connector = FacebookConnector.FacebookConnector()
    access_token = fb_connector.get_facebook_access_token()

    zones = ZoneManager.get_distinct_zones(mc_connector)

    for zone in zones:
        time.sleep(15)
        results = fetch_domain(fb_connector, access_token, zone)

        if results is None:
            print("ERROR looking up: " + zone)
            continue

        print(zone + ": " + str(len(results)))

        for result in results:
            cert_f = open(file_path + zone + "_" + result['id'] + ".pem", "w")
            cert_f.write(result['certificate_pem'])
            cert_f.close()

    now = datetime.now()
    print("Complete: " + str(now))
Exemplo n.º 3
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print ("Starting: " + str(now))
    logger.info("Starting...")

    dns_types = {"a":1, "ns":2, "cname":5, "soa":6, "ptr":12, "hinfo": 13, "mx": 15, "txt":16, "aaaa":28, "srv":33, "naptr": 35, "ds": 43, "rrsig": 46, "dnskey": 48}

    mongo_connector = MongoConnector.MongoConnector()
    all_dns_collection = mongo_connector.get_all_dns_connection()
    jobs_manager = JobsManager.JobsManager(mongo_connector, 'marinus_dns')
    jobs_manager.record_job_start()

    dns_manager = DNSManager.DNSManager(mongo_connector)

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    google_dns = GoogleDNS.GoogleDNS()

    for zone in zones:
        time.sleep(1)
        for dtype, dnum in dns_types.items():
            result = google_dns.fetch_DNS_records(zone, dnum)

            if result == []:
                logger.debug("No records found for " + zone)
            else:
                new_record = result[0]
                new_record['status'] = 'confirmed'
                new_record['zone'] = zone
                new_record['created'] = datetime.now()
                logger.debug ("Found " + dtype + " for: " + zone)
                dns_manager.insert_record(new_record, "marinus")

    logger.info("Starting SOA Search")

    soa_searches = find_sub_zones(all_dns_collection)
    for entry in soa_searches:
        time.sleep(1)
        result = google_dns.fetch_DNS_records(zone, dns_types['soa'])
        if result != []:
            new_record = result[0]
            new_record['status'] = 'confirmed'
            new_record['zone'] = get_fld_from_value(entry, '')
            new_record['created'] = datetime.now()
            logger.debug ("Found SOA: " + entry)
            if new_record['zone'] != '':
                dns_manager.insert_record(new_record, "marinus")

    jobs_manager.record_job_complete()

    now = datetime.now()
    print ("Complete: " + str(now))
    logger.info("Complete.")
Exemplo n.º 4
0
def main():
    """
    Begin Main...
    """
    now = datetime.now()
    print("Starting: " + str(now))

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    jobs_collection = mongo_connector.get_jobs_connection()
    zone_ingestor = ZoneIngestor.ZoneIngestor()

    current_zones = ZoneManager.get_distinct_zones(mongo_connector)

    # For cases with multiple R53 accounts, include the account id for reference
    sts = boto3.client('sts')
    account_id = sts.get_caller_identity()["Arn"].split(':')[4]
    r53_source = "R53:" + str(account_id)

    r53_client = boto3.client('route53')

    r53_domains = r53_client.list_hosted_zones()
    r53_zone_list = []
    while r53_domains != {}:
        for zone_data in r53_domains['HostedZones']:
            # Only add public zones
            if zone_data['Config']['PrivateZone'] == False:
                r53_zone_list.append(zone_data)

        if r53_domains['IsTruncated'] == True:
            r53_domains = r53_client.list_domains(
                Marker=r53_domains['NextMarker'])
        else:
            r53_domains = {}

    for zone_data in r53_zone_list:
        # Double check that this is not a new zone
        zone_name = zone_data['Name'][:-1]
        if zone_name not in current_zones:
            print("Creating zone: " + zone_name)
            zone_ingestor.add_zone(zone_data['Name'], r53_source)

        # Add hosts to the zone
        update_records(r53_client, dns_manager, zone_data, r53_source)

    # Record status
    jobs_collection.update_one({'job_name': 'get_route53'}, {
        '$currentDate': {
            "updated": True
        },
        "$set": {
            'status': 'COMPLETE'
        }
    })

    now = datetime.now()
    print("Ending: " + str(now))
Exemplo n.º 5
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    zone_ingestor = ZoneIngestor.ZoneIngestor()

    jobs_manager = JobsManager.JobsManager(mongo_connector, "get_route53")
    jobs_manager.record_job_start()

    current_zones = ZoneManager.get_distinct_zones(mongo_connector)

    # For cases with multiple R53 accounts, include the account id for reference
    sts = boto3.client("sts")
    account_id = sts.get_caller_identity()["Arn"].split(":")[4]
    r53_source = "R53:" + str(account_id)

    r53_client = boto3.client("route53")

    r53_domains = r53_client.list_hosted_zones()
    r53_zone_list = []
    while r53_domains != {}:
        for zone_data in r53_domains["HostedZones"]:
            # Only add public zones
            if zone_data["Config"]["PrivateZone"] == False:
                r53_zone_list.append(zone_data)

        if r53_domains["IsTruncated"] == True:
            r53_domains = r53_client.list_domains(
                Marker=r53_domains["NextMarker"])
        else:
            r53_domains = {}

    for zone_data in r53_zone_list:
        # Double check that this is not a new zone
        zone_name = zone_data["Name"][:-1]
        if zone_name not in current_zones:
            logger.info("Creating zone: " + zone_name)
            zone_ingestor.add_zone(zone_data["Name"], r53_source)

        # Add hosts to the zone
        update_records(r53_client, dns_manager, zone_data, r53_source)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")
Exemplo n.º 6
0
def main():
    """
    Begin Main...
    """
    now = datetime.now()
    print("Starting: " + str(now))

    # Create an instance of the VirusTotal class
    vt_instance = VirusTotal.VirusTotal()

    # Get collections for the queries
    mongo_connector = MongoConnector.MongoConnector()
    vt_collection = mongo_connector.get_virustotal_connection()

    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           'get_virustotal_data')
    jobs_manager.record_job_start()

    # Collect the list of tracked TLDs
    zones = ZoneManager.get_distinct_zones(mongo_connector)

    # For each tracked TLD
    for zone in zones:
        print("Checking " + zone)
        results = vt_instance.get_domain_report(zone)

        if results is None:
            print("Error querying zone " + zone)
        elif results['response_code'] == -1:
            print("VT unhappy with " + zone)
        elif results['response_code'] == 0:
            print("VT doesn't have " + zone)
        else:
            print("Matched " + zone)

            results['zone'] = zone
            results['created'] = datetime.now()

            # Mongo doesn't allow key names with periods in them
            # Re-assign to an undotted key name
            if "Dr.Web category" in results:
                results['Dr Web category'] = results.pop("Dr.Web category")

            vt_collection.delete_one({"zone": zone})
            vt_collection.insert(results)

        # This sleep command is so that we don't exceed the daily limit on the free API
        # This setting results in this script taking several days to complete
        time.sleep(25)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
Exemplo n.º 7
0
    def __ultradns_zone_info_response_handler(self, response):
        """
        Handles the API response. Incorrect JSON parsing is allowed upto 20 times post which the
        script exits.
        :param response: Response object
        """
        try:
            response_data = response.json()
            record_sets = response_data["rrSets"]
        except (ValueError, AttributeError) as err:
            if self.UH.incorrect_response_json_allowed > 0:
                self._logger.warning(
                    "Unable to parse response JSON for zone " +
                    self.zone_queried)
                self.UH.incorrect_response_json_allowed -= 1
            else:
                self.APIH.handle_api_error(
                    "Unable to parse response JSON for 20 zones: " + repr(err),
                    self.UH.jobs_manager,
                )
        else:
            for record in record_sets:
                dns_info = dict()
                # The ownerName could be either the FQDN or a relative domain name.
                # In case it is a FQDN it will end in '.'
                fqdn = record["ownerName"] + "." + self.zone_queried
                if record["ownerName"].endswith("."):
                    fqdn = record["ownerName"][:-1]

                # A get_root_domain lookup is performed because UDNS supports sub-zones
                dns_info["zone"] = ZoneManager.get_root_domain(
                    self.zone_queried)
                dns_info["fqdn"] = fqdn
                dns_info["type"] = record["rrtype"].split(" ")[0].lower()
                dns_info["status"] = "unknown"

                for dns in record["rdata"]:
                    if dns_info["type"] in ["a", "ptr"]:
                        try:
                            if IPAddress(dns).is_private():
                                continue
                        except AddrFormatError as err:
                            self._logger.warning("For " + fqdn +
                                                 " encountered: " + str(err))
                            continue

                    if not (dns_info["type"] in ["soa", "txt"
                                                 ]) and dns.endswith("."):
                        dns = dns[:-1]
                    dns_info["value"] = dns
                    dns_info["created"] = datetime.now()
                    self.DNS_MGR.insert_record(dns_info.copy(), self.UH.source)

            self.UH.set_offset(response_data["resultInfo"])
Exemplo n.º 8
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           "get_external_cnames")
    jobs_manager.record_job_start()

    groups = {}

    # Collect zones
    zone_results = ZoneManager.get_distinct_zones(mongo_connector)

    zones = []
    for zone in zone_results:
        if zone.find(".") >= 0:
            zones.append(zone)

    # Collect the all_dns cnames.
    logger.info("Starting All DNS...")
    all_dns_recs = dns_manager.find_multiple({"type": "cname"}, None)

    for srec in all_dns_recs:
        if not is_tracked_zone(srec["value"], zones):
            add_to_list(
                get_fld_from_value(srec["value"], srec["zone"]),
                srec["fqdn"],
                srec["value"],
                srec["zone"],
                groups,
            )

    # Update the database
    tpds_collection = mongo_connector.get_tpds_connection()

    tpds_collection.delete_many({})
    for key in groups.keys():
        tpds_collection.insert_one(groups[key])

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")
Exemplo n.º 9
0
def main():
    now = datetime.now()
    print ("Starting: " + str(now))

    dns_types = {"a":1, "ns":2, "cname":5, "soa":6, "ptr":12, "hinfo": 13, "mx": 15, "txt":16, "aaaa":28, "srv":33, "naptr": 35, "ds": 43, "rrsig": 46, "dnskey": 48}

    mongo_connector = MongoConnector.MongoConnector()
    all_dns_collection = mongo_connector.get_all_dns_connection()
    jobs_collection = mongo_connector.get_jobs_connection()

    dns_manager = DNSManager.DNSManager(mongo_connector)

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    google_dns = GoogleDNS.GoogleDNS()

    for zone in zones:
        time.sleep(1)
        for dtype, dnum in dns_types.items():
            result = google_dns.fetch_DNS_records(zone, dnum)

            if result == []:
                print("No records found for " + zone)
            else:
                new_record = result[0]
                new_record['status'] = 'confirmed'
                new_record['zone'] = zone
                new_record['created'] = datetime.now()
                print ("Found " + dtype + " for: " + zone)
                dns_manager.insert_record(new_record, "marinus")

    print("Starting SOA Search")

    soa_searches = find_sub_zones(all_dns_collection)
    for entry in soa_searches:
        time.sleep(1)
        result = google_dns.fetch_DNS_records(zone, dns_types['soa'])
        if result != []:
            new_record = result[0]
            new_record['status'] = 'confirmed'
            new_record['zone'] = get_fld_from_value(entry, '')
            new_record['created'] = datetime.now()
            print ("Found SOA: " + entry)
            if new_record['zone'] != '':
                dns_manager.insert_record(new_record, "marinus")

    jobs_collection.update_one({'job_name': 'marinus_dns'},
                               {'$currentDate': {"updated" : True},
                                "$set": {'status': 'COMPLETE'}})

    now = datetime.now()
    print ("Complete: " + str(now))
Exemplo n.º 10
0
def main():
    """
    Begin Main...
    """

    now = datetime.now()
    print("Starting: " + str(now))

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)

    jobs_collection = mongo_connector.get_jobs_connection()

    groups = {}

    # Collect zones
    zone_results = ZoneManager.get_distinct_zones(mongo_connector)

    zones = []
    for zone in zone_results:
        if zone.find(".") >= 0:
            zones.append(zone)

    # Collect the all_dns cnames.
    print("Starting All DNS...")
    all_dns_recs = dns_manager.find_multiple({'type': 'cname'}, None)

    for srec in all_dns_recs:
        if not is_tracked_zone(srec['value'], zones):
            add_to_list(get_fld_from_value(srec['value'], srec['zone']),
                        srec['fqdn'], srec['value'], srec['zone'], groups)

    # Update the database
    tpds_collection = mongo_connector.get_tpds_connection()

    tpds_collection.remove({})
    for key in groups.keys():
        tpds_collection.insert(groups[key])

    # Record status
    jobs_collection.update_one({'job_name': 'get_external_cnames'}, {
        '$currentDate': {
            "updated": True
        },
        "$set": {
            'status': 'COMPLETE'
        }
    })

    now = datetime.now()
    print("Ending: " + str(now))
Exemplo n.º 11
0
 def get_infoblox_extattr(self):
     """
     Extracts the zones from the zone collection to query Infoblox. The API calls continue to be made
     for the zone till the next_page_id is set to None indicating no new results to be fetched.
     Post the retrieval of all the data, the archaic data for a zone and record_type is purged.
     """
     zones = ZoneManager.get_zones_by_source(self.MC, 'Infoblox')
     for zone in zones:
         self.zone_queried = zone
         self.next_page_id = None
         self.__get_previous_records()
         self.__infoblox_paginated_request()
         while self.next_page_id:
             self.__infoblox_paginated_request()
         self.IH.clean_collection(self.previous_records, self.iblox_extattr_collection)
Exemplo n.º 12
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    if is_running(os.path.basename(__file__)):
        logger.warning("Already running...")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    r7 = Rapid7.Rapid7()

    mongo_connection = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connection)
    ip_manager = IPManager.IPManager(mongo_connection)
    rdns_collection = mongo_connection.get_sonar_reverse_dns_connection()

    zones = ZoneManager.get_distinct_zones(mongo_connection)
    logger.info("Zone length: " + str(len(zones)))

    save_directory = "./files/"

    parser = argparse.ArgumentParser(
        description='Parse Sonar files based on CIDRs.')
    parser.add_argument('--sonar_file_type',
                        required=True,
                        help='Specify "dns" or "rdns"')
    args = parser.parse_args()

    check_save_location(save_directory)

    # A session is necessary for the multi-step log-in process
    s = requests.Session()

    if args.sonar_file_type == "rdns":
        jobs_manager = JobsManager.JobsManager(mongo_connection,
                                               'get_data_by_cidr_rdns')
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "rdns", jobs_manager)
            if html_parser.rdns_url == "":
                logger.error("Unknown Error")
                jobs_manager.record_job_error()
                exit(0)

            unzipped_rdns = download_remote_files(logger, s,
                                                  html_parser.rdns_url,
                                                  save_directory, jobs_manager)
            update_rdns(logger, unzipped_rdns, rdns_collection, dns_manager,
                        ip_manager, zones)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))
            jobs_manager.record_job_error()
            exit(0)

        logger.info("RDNS Complete")
        jobs_manager.record_job_complete()

    elif args.sonar_file_type == "dns":
        jobs_manager = JobsManager.JobsManager(mongo_connection,
                                               'get_data_by_cidr_dns')
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "fdns", jobs_manager)
            if html_parser.any_url != "":
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.any_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, dns_manager, ip_manager,
                           zones)
            if html_parser.a_url != "":
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.a_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, dns_manager, ip_manager,
                           zones)
            if html_parser.aaaa_url != "":
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.aaaa_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, dns_manager, ip_manager,
                           zones)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))

            jobs_manager.record_job_error()
            exit(0)

        logger.info("DNS Complete")

        jobs_manager.record_job_complete()

    else:
        logger.error("Unrecognized sonar_file_type option. Exiting...")

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemplo n.º 13
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    google_dns = GoogleDNS.GoogleDNS()
    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           'extract_ssl_domains')
    jobs_manager.record_job_start()

    parser = argparse.ArgumentParser(
        description='Search TLS certificates for additional DNS names')
    parser.add_argument('--zgrab_version',
                        default=2,
                        type=int,
                        choices=[1, 2],
                        metavar="version",
                        help='The version of ZGrab used to collect data')
    args = parser.parse_args()

    dns_names = []
    round_two = []

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    # Collect the list of domains from the SSL Certificates
    extract_ct_certificate_names(dns_names, mongo_connector)
    # extract_censys_certificate_names(dns_names, mongo_connector)
    if args.zgrab_version == 1:
        extract_zgrab_certificate_names(logger, dns_names, mongo_connector)
    else:
        extract_zgrab2_certificate_names(logger, dns_names, mongo_connector)

    input_list = []

    # Some SSL certificates are for multiple domains.
    # The tracked company may not own all domains.
    # Therefore, we filter to only the root domains that belong to the tracked company.
    logger.info("Pre-filter list: " + str(len(dns_names)))
    for hostname in dns_names:
        if not hostname.startswith("*"):
            zone = get_tracked_zone(hostname, zones)
            if zone != None:
                ips = google_dns.fetch_DNS_records(hostname)

                # Pause to prevent DoS-ing of Google's HTTPS DNS Service
                time.sleep(1)

                if ips != []:
                    for ip_addr in ips:
                        temp_zone = get_tracked_zone(ip_addr['fqdn'], zones)
                        if temp_zone is not None:
                            record = {"fqdn": ip_addr['fqdn']}
                            record['zone'] = temp_zone
                            record['created'] = datetime.now()
                            record['type'] = ip_addr['type']
                            record['value'] = ip_addr['value']
                            record['status'] = 'unknown'
                            input_list.append(record)

                        if ip_addr['type'] == "cname" and is_tracked_zone(
                                ip_addr['value'], zones):
                            add_to_round_two(ip_addr['value'], round_two)

                else:
                    logger.warning("Failed IP Lookup for: " + hostname)
            else:
                logger.warning("Failed match on zone for: " + hostname)
        else:
            logger.warning("Skipping wildcard: " + hostname)

    dead_dns_collection = mongo_connector.get_dead_dns_connection()

    # Some DNS records will be CNAME records pointing to other tracked domains.
    # This is a single level recursion to lookup those domains.
    logger.info("Round Two list: " + str(len(round_two)))
    for hostname in round_two:
        zone = get_tracked_zone(hostname, zones)
        if zone != None:
            ips = google_dns.fetch_DNS_records(hostname)
            time.sleep(1)
            if ips != []:
                for ip_addr in ips:
                    temp_zone = get_tracked_zone(ip_addr['fqdn'], zones)
                    if temp_zone is not None:
                        record = {"fqdn": ip_addr['fqdn']}
                        record['zone'] = temp_zone
                        record['created'] = datetime.now()
                        record['type'] = ip_addr['type']
                        record['value'] = ip_addr['value']
                        record['status'] = 'unknown'
                        input_list.append(record)
            else:
                logger.warning("Failed IP Lookup for: " + hostname)
                original_record = dns_manager.find_one({"fqdn": hostname},
                                                       "ssl")
                if original_record != None:
                    original_record.pop("_id")
                    dead_dns_collection.insert(original_record)
        else:
            logger.warning("Failed match on zone for: " + hostname)

    # Record all the results.
    dns_manager.remove_by_source("ssl")
    logger.info("List length: " + str(len(input_list)))
    for final_result in input_list:
        dns_manager.insert_record(final_result, "ssl")

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete")
Exemplo n.º 14
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    # Obtain the list of known email addresses from the config collection
    mongo_connector = MongoConnector.MongoConnector()
    whois_collection = mongo_connector.get_whois_connection()
    all_dns_collection = mongo_connector.get_all_dns_connection()
    zones_collection = mongo_connector.get_zone_connection()
    jobs_manager = JobsManager.JobsManager(mongo_connector, 'mark_expired')
    jobs_manager.record_job_start()

    # Grab all zones that are not expired of false_positives
    # Also exclude any that were recently created since they won't have data yet
    date_delta = datetime.today() - timedelta(days=30)
    zones = zones_collection.distinct(
        'zone', {
            'created': {
                "$lt": date_delta
            },
            'status': {
                "$nin": [ZoneManager.EXPIRED, ZoneManager.FALSE_POSITIVE]
            }
        })

    # The Python Whois library is hit and miss with some international zones.
    # For now, this script focuses on the most popular TLDs.
    new_zones = get_primary_zones(logger, zones)

    expired_list = []
    for zone in new_zones:
        if whois_collection.find({'zone': zone}).count() == 0:
            # Assume it is expired if there is no longer a whois record present
            expired_list.append(zone)

    for zone in expired_list:
        if all_dns_collection.find({'zone': zone}).count() > 0:
            # This may be a case where the Python Whois library failed
            # and the zone is still active.
            logger.debug("DNS records still exist for " + zone)
            expired_list.remove(zone)

    zone_manager = ZoneManager(mongo_connector)

    # Need to get this list before setting zones to expired in order to avoid a recursion problem.
    already_expired = zone_manager.get_zones_by_status(ZoneManager.EXPIRED)

    possibly_renewed = []
    for zone in already_expired:
        if whois_collection.find({'zone': zone}).count() == 1:
            possibly_renewed.append(zone)

    for zone in expired_list:
        logger.debug("Expiring: " + zone)
        zone_manager.set_status(zone, ZoneManager.EXPIRED, "mark_expired.py")

    # Get the list of known registering entities.
    # This will only work for some whois lookups since Python Whois doesn't get
    # a valid org for all lookups and some have privacy enabled.
    config_collection = mongo_connector.get_config_connection()
    result = config_collection.find({}, {
        'Whois_Orgs': 1,
        'Whois_Name_Servers': 1
    })
    orgs = result[0]['Whois_Orgs']
    name_servers = []
    if 'Whois_Name_Servers' in result[0]:
        name_servers = result[0]['Whois_Name_Servers']

    logger.debug(str(name_servers))

    for zone in possibly_renewed:
        # We need to be careful of automatically marking something renewed
        # since it could have been registered by someone else.
        if whois_collection.find({
                'zone': zone,
                'org': {
                    "$in": orgs
                }
        }).count() == 1:
            logger.warning("ATTENTION: " + zone +
                           " has been renewed based on org")
            zone_manager.set_status(zone, ZoneManager.UNCONFIRMED,
                                    "mark_expired.py")
        else:
            result = whois_collection.find({'zone': zone}, {
                'name_servers': 1,
                "_id": 0
            })
            found = 0
            if result is not None and 'name_servers' in result[0] and result[
                    0]['name_servers'] is not None:
                for entry in result[0]['name_servers']:
                    if entry.lower() in name_servers:
                        logger.warning(
                            "ATTENTION: " + zone +
                            " has been renewed based on name servers")
                        zone_manager.set_status(zone, ZoneManager.UNCONFIRMED,
                                                "mark_expired.py")
                        found = 1
                        break
            if found == 0:
                result = whois_collection.find({'zone': zone}, {
                    'name_server_groups': 1,
                    "_id": 0
                })
                if result is not None and 'name_server_groups' in result[
                        0] and result[0]['name_server_groups'] is not None:
                    for entry in result[0]['name_server_groups']:
                        if entry.lower() in name_servers:
                            logger.warning(
                                "ATTENTION: " + zone +
                                " has been renewed based on name server_groups"
                            )
                            zone_manager.set_status(zone,
                                                    ZoneManager.UNCONFIRMED,
                                                    "mark_expired.py")
                            found = 1
                            break
            if found == 0:
                logger.warning(zone + " has been renewed by an unknown entity")

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")
Exemplo n.º 15
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    # Make database connections
    mongo_connector = MongoConnector.MongoConnector()
    ct_collection = mongo_connector.get_certificate_transparency_connection()
    config_collection = mongo_connector.get_config_connection()
    x509parser = X509Parser.X509Parser()

    zones = ZoneManager.get_distinct_zones(mongo_connector)
    result = config_collection.find_one({}, {'SSL_Orgs': 1, "_id": 0})
    ssl_orgs = result['SSL_Orgs']

    # Defaults
    save_location = '/mnt/workspace/'
    download_method = 'dbAndSave'
    save_type = "PEM"

    parser = argparse.ArgumentParser(
        description='Download certificate information from the provide CT Log.'
    )
    parser.add_argument(
        '--log_source',
        required=True,
        help=
        'Indicates which log to query based on values in the x509Parser library'
    )
    parser.add_argument(
        '--include_precerts',
        action="store_true",
        help='Include pre-certificates which are not finalized')
    parser.add_argument(
        '--download_methods',
        choices=['dbAndSave', 'dbOnly'],
        default=download_method,
        help=
        'Indicates whether to download the raw files or just save to the database'
    )
    parser.add_argument(
        '--starting_index',
        required=False,
        default=-1,
        type=int,
        help='Force the script to start at specific index within the log.')
    parser.add_argument(
        '--cert_save_location',
        required=False,
        default=save_location,
        help=
        'Indicates where to save the certificates on disk when choosing dbAndSave'
    )
    parser.add_argument(
        '--save_type',
        choices=['PEM', 'ASN1'],
        default=save_type,
        help='Indicates which format to use for the data. The default is PEM')
    args = parser.parse_args()

    source = args.log_source
    try:
        ct_log_map = x509parser.CT_LOG_MAP[source]
    except:
        logger.error("ERROR: UNKNOWN LOG SOURCE: " + source)
        exit(1)

    if args.cert_save_location:
        save_location = args.cert_save_location
        if not save_location.endswith("/"):
            save_location = save_location + "/"

    if args.download_methods:
        download_method = args.download_methods
        check_save_location(save_location, source)

    if args.save_type:
        save_type = args.save_type

    jobs_manager = JobsManager.JobsManager(mongo_connector, "ct_log-" + source)
    jobs_manager.record_job_start()

    if args.starting_index == -1:
        starting_index = fetch_starting_index(ct_collection, source)
    else:
        starting_index = args.starting_index
    logger.info("Starting Index: " + str(starting_index))

    sth_data = fetch_sth(logger, "https://" + ct_log_map['url'])
    logger.info("Tree size: " + str(sth_data['tree_size']))

    current_index = starting_index
    while current_index < sth_data['tree_size']:
        ending_index = current_index + 256
        if ending_index > sth_data['tree_size']:
            ending_index = sth_data['tree_size']

        logger.debug("Checking from index: " + str(current_index) +
                     " to index " + str(ending_index))
        certs = fetch_certificate_batch(logger, "https://" + ct_log_map['url'],
                                        current_index, ending_index)

        for entry in certs['entries']:
            der_cert, cert_type = get_cert_from_leaf(logger,
                                                     entry['leaf_input'])
            if der_cert is None and cert_type == 1 and not args.include_precerts:
                current_index = current_index + 1
                continue
            elif der_cert is None and cert_type == 0:
                current_index = current_index + 1
                continue
            elif der_cert is None and cert_type == 1:
                der_cert = get_cert_from_extra_data(entry['extra_data'])

            cert = x509parser.parse_data(der_cert, source)
            if cert is None:
                logger.warning("Skipping certificate index: " +
                               str(current_index))
                current_index = current_index + 1
                continue

            if cert_type == 1:
                cert['ct_log_type'] = "PRE-CERTIFICATE"
            else:
                cert['ct_log_type'] = "CERTIFICATE"

            cert_zones = check_zone_relevancy(cert, zones)

            if check_org_relevancy(cert, ssl_orgs) or cert_zones != []:
                cert[source + "_id"] = current_index
                cert['zones'] = cert_zones
                logger.info("Adding " + source + " id: " + str(current_index) +
                            " SHA256: " + cert['fingerprint_sha256'])
                insert_certificate(cert, source, ct_collection, cert_zones)

                if download_method == 'dbAndSave':
                    write_file(logger, cert, save_location, save_type, source)

            current_index = current_index + 1

    # Set isExpired for any entries that have recently expired.
    ct_collection.update(
        {
            "not_after": {
                "$lt": datetime.utcnow()
            },
            "isExpired": False
        }, {"$set": {
            "isExpired": True
        }},
        multi=True)

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")
Exemplo n.º 16
0
def main():
    global global_exit_flag
    global global_retest_list
    global global_sleep_time
    global global_queue_size

    global_retest_list = []

    parser = argparse.ArgumentParser(description='Launch zgrab against IPs using port 22, 25, 443, or 465.')
    parser.add_argument('-p',  choices=['443','22', '25', '465'], metavar="port", help='The port to scan: 22, 25, 443, or 465')
    parser.add_argument('-t',  default=5, type=int, metavar="threadCount", help='The number of threads')
    parser.add_argument('--mx', action="store_true", help='Scan only IPs from MX records. Useful for SMTP scans.')
    parser.add_argument('-s',  default=0, type=int, metavar="sleepTime", help='Sleep time in order to spread out the batches')
    parser.add_argument('--qs',  default=0, type=int, metavar="queueSize", help='How many hosts to scan in a batch')
    parser.add_argument('--zones_only', action="store_true", help='Scan only IPs from IP zones.')
    args = parser.parse_args()

    if args.p == None:
        print("A port value (22, 25, 443, or 465) must be provided.")
        exit(0)

    if is_running(os.path.basename(__file__)):
        """
        Check to see if a previous attempt to parse is still running...
        """
        now = datetime.now()
        print(str(now) + ": I am already running! Goodbye!")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))

    rm_connector = RemoteMongoConnector.RemoteMongoConnector()
    all_dns_collection = rm_connector.get_all_dns_connection()

    zones_struct = {}
    zones_struct['zones'] = ZoneManager.get_distinct_zones(rm_connector)

    zones_struct['ip_zones'] = get_ip_zones(rm_connector)

    # Collect the list of AWS CIDRs
    zones_struct['aws_ips'] = get_aws_ips(rm_connector)

    # Collect the list of Azure CIDRs
    zones_struct['azure_ips'] = get_azure_ips(rm_connector)

    if args.mx:
        (ips, ip_context) = get_mx_ips(zones_struct['zones'], all_dns_collection)
    elif args.zones_only:
        (ips, ip_context) = get_only_ipzones(zones_struct['ip_zones'])
    else:
        (ips, ip_context) = get_ips(zones_struct['ip_zones'], all_dns_collection)

    if args.s and int(args.s) > 0:
        global_sleep_time = int(args.s)

    if args.qs and int(args.qs) > 0:
        global_queue_size = int(args.qs)

    print("Got IPs: " + str(len(ips)))
    zones_struct['ip_context'] = ip_context

    zgrab_collection = rm_connector.get_zgrab_port_data_connection()
    if args.p == "443":
        run_command = run_port_443_command
    elif args.p == "22":
        run_command = run_port_22_command
    elif args.p == "25":
        run_command = run_port_25_command
    elif args.p == "465":
        run_command = run_port_465_command

    threads = []

    print ("Creating " + str(args.t) + " threads")
    for thread_id in range (1, args.t + 1):
        thread = ZgrabThread(thread_id, global_work_queue, args.p, run_command, zones_struct, zgrab_collection)
        thread.start()
        threads.append(thread)
        thread_id += 1

    print("Populating Queue")
    global_queue_lock.acquire()
    for ip in ips:
        global_work_queue.put(ip)
    global_queue_lock.release()

    # Wait for queue to empty
    while not global_work_queue.empty():
        pass

    # Notify threads it's time to exit
    global_exit_flag = 1

    # Wait for all threads to complete
    for t in threads:
        t.join()

    print ("Exiting Main Thread")

    print("Global retest list: " + str(len(global_retest_list)))

    # Retest any SMTP hosts that did not respond to the StartTLS handshake
    if args.p == "25" and len(global_retest_list) > 0:
        process_thread(global_retest_list, args.p, run_port_25_no_tls_command, zones_struct, zgrab_collection, "retest")


    # Remove old entries from before the scan
    if args.p == "443":
        other_results = zgrab_collection.find({'data.tls': {"$exists": True}, 'data.tls.timestamp': {"$lt": now}})
        for result in other_results:
            zgrab_collection.update_one({"_id": ObjectId(result['_id'])}, {"$unset": {'data.tls': ""}})
    elif args.p == "22":
        other_results = zgrab_collection.find({'data.xssh': {"$exists": True}, 'data.xssh.timestamp': {"$lt": now}})
        for result in other_results:
            zgrab_collection.update_one({"_id": ObjectId(result['_id'])}, {"$unset": {'data.xssh': ""}})
    elif args.p == "25":
        other_results = zgrab_collection.find({'data.smtp': {"$exists": True}, 'data.smtp.timestamp': {"$lt": now}})
        for result in other_results:
            zgrab_collection.update_one({"_id": ObjectId(result['_id'])}, {"$unset": {'data.smtp': ""}})
    elif args.p == "465":
        other_results = zgrab_collection.find({'data.smtps': {"$exists": True}, 'data.smtps.timestamp': {"$lt": now}})
        for result in other_results:
            zgrab_collection.update_one({"_id": ObjectId(result['_id'])}, {"$unset": {'data.smtps': ""}})

    # Remove any completely empty entries
    zgrab_collection.remove({'data': {}})

    now = datetime.now()
    print("Complete: " + str(now))
Exemplo n.º 17
0
def main():
    """
    Begin Main...
    """

    now = datetime.now()
    print("Starting: " + str(now))

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           'extract_mx_domains')
    google_dns = GoogleDNS.GoogleDNS()

    jobs_manager.record_job_start()

    dns_names = []
    round_two = []

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    # Collect the list of domains from the MX Records
    extract_mx_names(dns_names, dns_manager)

    input_list = []

    # Some MX records point to the third-party domains.
    # Therefore, we filter to only the root domains that belong to the tracked company.
    print("Pre-filter list: " + str(len(dns_names)))
    for hostname in dns_names:
        zone = get_tracked_zone(hostname, zones)
        if zone != None:
            ips = google_dns.fetch_DNS_records(hostname)

            # Pause to prevent DoS-ing of Google's HTTPS DNS Service
            time.sleep(1)

            if ips != []:
                for ip_addr in ips:
                    temp_zone = get_tracked_zone(ip_addr['fqdn'], zones)
                    if temp_zone is not None:
                        record = {"fqdn": ip_addr['fqdn']}
                        record['zone'] = temp_zone
                        record['created'] = datetime.now()
                        record['type'] = ip_addr['type']
                        record['value'] = ip_addr['value']
                        record['status'] = 'unknown'
                        input_list.append(record)

                    if ip_addr['type'] == "cname" and is_tracked_zone(
                            ip_addr['value'], zones):
                        add_to_round_two(ip_addr['value'], round_two)
            else:
                print("Failed IP Lookup for: " + hostname)
        else:
            print("Failed match on zone for: " + hostname)

    dead_dns_collection = mongo_connector.get_dead_dns_connection()

    # Some DNS records will be CNAME records pointing to other tracked domains.
    # This is a single level recursion to lookup those domains.
    print("Round Two list: " + str(len(round_two)))
    for hostname in round_two:
        zone = get_tracked_zone(hostname, zones)
        if zone != None:
            ips = google_dns.fetch_DNS_records(hostname)
            time.sleep(1)
            if ips != []:
                for ip_addr in ips:
                    temp_zone = get_tracked_zone(ip_addr['fqdn'], zones)
                    if temp_zone is not None:
                        record = {"fqdn": ip_addr['fqdn']}
                        record['zone'] = temp_zone
                        record['created'] = datetime.now()
                        record['type'] = ip_addr['type']
                        record['value'] = ip_addr['value']
                        record['status'] = 'unknown'
                        input_list.append(record)
            else:
                print("Failed IP Lookup for: " + hostname)
                original_record = dns_manager.find_one({"fqdn": hostname},
                                                       "mx")
                if original_record != None:
                    original_record.pop("_id")
                    dead_dns_collection.insert(original_record)
        else:
            print("Failed match on zone for: " + hostname)

    # Record all the results.
    dns_manager.remove_by_source("mx")
    print("List length: " + str(len(input_list)))
    for final_result in input_list:
        dns_manager.insert_record(final_result, "mx")

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
Exemplo n.º 18
0
def main():
    """
    Begin Main...
    """
    now = datetime.now()
    print("Starting: " + str(now))

    # Obtain the list of known email addresses from the config collection
    mongo_connector = MongoConnector.MongoConnector()
    whois_collection = mongo_connector.get_whois_connection()
    all_dns_collection = mongo_connector.get_all_dns_connection()
    zones_collection = mongo_connector.get_zone_connection()
    jobs_collection = mongo_connector.get_jobs_connection()

    # Grab all zones that are not expired of false_positives
    # Also exclude any that were recently created since they won't have data yet
    date_delta = datetime.today() - timedelta(days=30)
    zones = zones_collection.distinct('zone', {'created': {"$lt": date_delta}, 'status': {"$nin": [ZoneManager.EXPIRED, ZoneManager.FALSE_POSITIVE]}})

    # The Python Whois library is hit and miss with some international zones.
    # For now, this script focuses on the most popular TLDs.
    new_zones = get_primary_zones(zones)

    expired_list = []
    for zone in new_zones:
        if whois_collection.find({'zone': zone}).count() == 0:
            # Assume it is expired if there is no longer a whois record present
            expired_list.append(zone)

    for zone in expired_list:
        if all_dns_collection.find({'zone': zone}).count() > 0:
            # This may be a case where the Python Whois library failed
            # and the zone is still active.
            print("DNS records still exist for " + zone)
            expired_list.remove(zone)

    zone_manager = ZoneManager(mongo_connector)

    # Need to get this list before setting zones to expired in order to avoid a recursion problem.
    already_expired = zone_manager.get_zones_by_status(ZoneManager.EXPIRED)

    possibly_renewed = []
    for zone in already_expired:
        if whois_collection.find({'zone': zone}).count() == 1:
            possibly_renewed.append(zone)


    for zone in expired_list:
        print("Expiring: " + zone)
        zone_manager.set_status(zone, ZoneManager.EXPIRED, "mark_expired.py")


    # Get the list of known registering entities.
    # This will only work for some whois lookups since Python Whois doesn't get
    # a valid org for all lookups and some have privacy enabled.
    config_collection = mongo_connector.get_config_connection()
    result = config_collection.find({}, {'Whois_Orgs': 1})
    orgs = result[0]['Whois_Orgs']

    for zone in possibly_renewed:
        # We need to be careful of automatically marking something renewed
        # since it could have been registered by someone else.
        if whois_collection.find({'zone': zone, 'org': {"$in": orgs}}).count() == 1:
            print("ATTENTION: " + zone + "has been renewed")
            zone_manager.set_status(zone, ZoneManager.UNCONFIRMED, "mark_expired.py")
        else:
            print("WARNING: " + zone + " has been renewed by an unknown entity")


    # Record status
    jobs_collection.update_one({'job_name': 'mark_expired'},
                               {'$currentDate': {"updated": True},
                                "$set": {'status': 'COMPLETE'}})


    now = datetime.now()
    print("Ending: " + str(now))
Exemplo n.º 19
0
def main():
    """
    Begin Main()
    """

    now = datetime.now()
    print("Starting: " + str(now))

    mongo_connector = MongoConnector.MongoConnector()
    jobs_collection = mongo_connector.get_jobs_connection()
    mongo_ct = mongo_connector.get_certificate_transparency_connection()
    cert_graphs_collection = mongo_connector.get_cert_graphs_connection()

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    parser = argparse.ArgumentParser(description='Creates and stores certificate graphs in the database based on one or more sources.')
    parser.add_argument('--check_censys', action='store_true', default=False, required=False, help='Whether to check the Censys collection in the database')
    parser.add_argument('--check_443_scans', action='store_true', default=False, required=False, help='Whether to check the zgrab collection in the database')
    parser.add_argument('--check_ct_scans', action='store_true', default=False, required=False, help='Whether to check the CT collection in the database')
    args = parser.parse_args()

    if args.check_censys is True:
        censys_collection = mongo_connector.get_censys_connection()

    if args.check_443_scans is True:
        zgrab_collection = mongo_connector.get_zgrab_443_data_connection()

    for zone in zones:
        print("Creating: " + zone)
        graph = nx.DiGraph()

        certs_list = {}

        if args.check_ct_scans:
            certs_list = get_current_ct_certificates(mongo_ct, zone)
        if args.check_censys:
            certs_list = add_censys_certificates(censys_collection, zone, certs_list)
        if args.check_443_scans:
            certs_list = add_terminal_zgrab_certificates(zgrab_collection, zone, certs_list)
            certs_list = add_initial_zgrab_certificates(zgrab_collection, zone, certs_list)



        graph = create_nodes(graph, mongo_connector, zone, certs_list)
        data = json_graph.node_link_data(graph)

        my_data = {}
        my_data['links'] = data['links']
        my_data['nodes'] = data['nodes']
        my_data['zone'] = zone
        my_data['created'] = datetime.now()

        cert_graphs_collection.remove({'zone': zone})
        cert_graphs_collection.insert(my_data)

    # Record status
    jobs_collection.update_one({'job_name': 'create_cert_graphs'},
                               {'$currentDate': {"updated": True},
                                "$set": {'status': 'COMPLETE'}})

    now = datetime.now()
    print("Ending: " + str(now))
Exemplo n.º 20
0
def main():
    """
    Begin Main...
    """
    global global_exit_flag
    global global_zgrab_path

    logger = LoggingUtil.create_log(__name__)

    parser = argparse.ArgumentParser(
        description='Launch zgrab against domains using port 80 or 443.')
    parser.add_argument('-p',
                        choices=['443', '80'],
                        metavar="port",
                        help='The web port: 80 or 443')
    parser.add_argument('-t',
                        default=5,
                        type=int,
                        metavar="threadCount",
                        help='The number of threads')
    parser.add_argument('--zgrab_path',
                        default=global_zgrab_path,
                        metavar='zgrabVersion',
                        help='The version of ZGrab to use')
    args = parser.parse_args()

    if args.p == None:
        logger.error("A port value (80 or 443) must be provided.")
        exit(1)

    if is_running(os.path.basename(__file__)):
        """
        Check to see if a previous attempt to parse is still running...
        """
        now = datetime.now()
        logger.warning(str(now) + ": I am already running! Goodbye!")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    rm_connector = RemoteMongoConnector.RemoteMongoConnector()
    all_dns_collection = rm_connector.get_all_dns_connection()
    jobs_manager = JobsManager.JobsManager(rm_connector,
                                           "zgrab_http_domain-" + args.p)
    jobs_manager.record_job_start()

    if args.p == "443":
        zgrab_collection = rm_connector.get_zgrab_443_data_connection()
        run_command = run_port_443_command
    else:
        zgrab_collection = rm_connector.get_zgrab_80_data_connection()
        run_command = run_port_80_command

    check_save_location("./json_p" + args.p)

    global_zgrab_path = args.zgrab_path

    zones = ZoneManager.get_distinct_zones(rm_connector)
    ip_manager = IPManager.IPManager(rm_connector)

    for zone in zones:
        global_exit_flag = 0

        domains = get_domains(all_dns_collection, ip_manager, zone)

        if len(domains) == 0:
            continue

        num_threads = args.t
        if len(domains) < args.t:
            num_threads = len(domains)

        logger.debug("Creating " + str(num_threads) + " threads")

        threads = []
        for thread_id in range(1, num_threads + 1):
            thread = ZgrabThread(thread_id, global_work_queue, args.p,
                                 run_command, zone, zgrab_collection)
            thread.start()
            threads.append(thread)
            thread_id += 1

        logger.debug(zone + " length: " + str(len(domains)))

        logger.info("Populating Queue")
        global_queue_lock.acquire()
        for domain in domains:
            global_work_queue.put(domain)
        global_queue_lock.release()

        # Wait for queue to empty
        while not global_work_queue.empty():
            pass

        logger.info("Queue empty")
        # Notify threads it's time to exit
        global_exit_flag = 1

        # Wait for all threads to complete
        for t in threads:
            t.join()

    # Remove last week's old entries
    lastweek = datetime.now() - timedelta(days=7)
    zgrab_collection.remove({
        'domain': {
            "$ne": "<nil>"
        },
        'timestamp': {
            "$lt": lastweek
        }
    })

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemplo n.º 21
0
def main():
    """
    Begin Main()
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    mongo_connector = MongoConnector.MongoConnector()
    mongo_ct = mongo_connector.get_certificate_transparency_connection()
    cert_graphs_collection = mongo_connector.get_cert_graphs_connection()
    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           "create_cert_graphs")
    jobs_manager.record_job_start()

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    parser = argparse.ArgumentParser(
        description=
        "Creates and stores certificate graphs in the database based on one or more sources."
    )
    parser.add_argument(
        "--check_censys",
        action="store_true",
        default=False,
        required=False,
        help="Whether to check the Censys collection in the database",
    )
    parser.add_argument(
        "--check_443_scans",
        action="store_true",
        default=False,
        required=False,
        help="Whether to check the zgrab collection in the database",
    )
    parser.add_argument(
        "--check_ct_scans",
        action="store_true",
        default=False,
        required=False,
        help="Whether to check the CT collection in the database",
    )
    parser.add_argument(
        "--zgrab_version",
        default=2,
        type=int,
        choices=[1, 2],
        metavar="version",
        help="The version of ZGrab used to collect data",
    )
    args = parser.parse_args()

    if args.check_censys is True:
        censys_collection = mongo_connector.get_censys_connection()

    if args.check_443_scans is True:
        zgrab_collection = mongo_connector.get_zgrab_443_data_connection()

    for zone in zones:
        logger.info("Creating: " + zone)
        graph = nx.DiGraph()

        certs_list = {}

        if args.check_ct_scans:
            certs_list = get_current_ct_certificates(mongo_ct, zone)
        if args.check_censys:
            certs_list = add_censys_certificates(censys_collection, zone,
                                                 certs_list)
        if args.check_443_scans:
            if args.zgrab_version == 1:
                certs_list = add_terminal_zgrab_certificates(
                    zgrab_collection, zone, certs_list)
                certs_list = add_initial_zgrab_certificates(
                    zgrab_collection, zone, certs_list)
            else:
                certs_list = add_terminal_zgrab2_certificates(
                    zgrab_collection, zone, certs_list)
                certs_list = add_initial_zgrab2_certificates(
                    zgrab_collection, zone, certs_list)

        graph = create_nodes(graph, mongo_connector, zone, certs_list)
        data = json_graph.node_link_data(graph)

        my_data = {}
        my_data["links"] = data["links"]
        my_data["nodes"] = data["nodes"]
        my_data["zone"] = zone
        my_data["created"] = datetime.now()

        cert_graphs_collection.delete_one({"zone": zone})
        mongo_connector.perform_insert(cert_graphs_collection, my_data)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")
Exemplo n.º 22
0
def main():
    """
    Beging Main...
    """
    global global_exit_flag
    global global_retest_list
    global global_sleep_time
    global global_queue_size
    global global_zgrab_path

    logger = LoggingUtil.create_log(__name__)

    global_retest_list = []

    parser = argparse.ArgumentParser(
        description="Launch zgrab against IPs using port 22, 25, 443, or 465.")
    parser.add_argument(
        "-p",
        choices=["22", "25", "443", "465"],
        metavar="port",
        help="The port to scan: 22, 25, 443, or 465",
    )
    parser.add_argument("-t",
                        default=5,
                        type=int,
                        metavar="threadCount",
                        help="The number of threads")
    parser.add_argument(
        "--mx",
        action="store_true",
        help="Scan only IPs from MX records. Useful for SMTP scans.",
    )
    parser.add_argument(
        "-s",
        default=0,
        type=int,
        metavar="sleepTime",
        help="Sleep time in order to spread out the batches",
    )
    parser.add_argument(
        "--qs",
        default=0,
        type=int,
        metavar="queueSize",
        help="How many hosts to scan in a batch",
    )
    parser.add_argument("--zones_only",
                        action="store_true",
                        help="Scan only IPs from IP zones.")
    parser.add_argument(
        "--zgrab_path",
        default=global_zgrab_path,
        metavar="zgrabVersion",
        help="The version of ZGrab to use",
    )
    args = parser.parse_args()

    if args.p == None:
        logger.error("A port value (22, 25, 443, or 465) must be provided.")
        exit(1)

    if is_running(os.path.basename(__file__)):
        """
        Check to see if a previous attempt to parse is still running...
        """
        now = datetime.now()
        logger.warning(str(now) + ": I am already running! Goodbye!")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    rm_connector = RemoteMongoConnector.RemoteMongoConnector()
    all_dns_collection = rm_connector.get_all_dns_connection()
    ip_manager = IPManager.IPManager(rm_connector, True)

    jobs_manager = JobsManager.JobsManager(rm_connector,
                                           "zgrab_port_ip-" + args.p)
    jobs_manager.record_job_start()

    zones_struct = {}
    zones_struct["zones"] = ZoneManager.get_distinct_zones(rm_connector)

    # Not pretty but works
    zones_struct["ip_manager"] = ip_manager

    if args.mx:
        (ips, ip_context) = get_mx_ips(zones_struct["zones"], ip_manager,
                                       all_dns_collection)
    elif args.zones_only:
        (ips, ip_context) = get_only_ipzones(ip_manager.Tracked_CIDRs)
    else:
        (ips, ip_context) = get_ips(ip_manager, all_dns_collection)

    if args.s and int(args.s) > 0:
        global_sleep_time = int(args.s)

    if args.qs and int(args.qs) > 0:
        global_queue_size = int(args.qs)

    logger.info("Got IPs: " + str(len(ips)))
    zones_struct["ip_context"] = ip_context

    zgrab_collection = rm_connector.get_zgrab_port_data_connection()
    if args.p == "443":
        run_command = run_port_443_command
    elif args.p == "22":
        run_command = run_port_22_command
    elif args.p == "25":
        run_command = run_port_25_command
    elif args.p == "465":
        run_command = run_port_465_command

    check_save_location("./json_p" + args.p)

    global_zgrab_path = args.zgrab_path

    threads = []

    logger.debug("Creating " + str(args.t) + " threads")
    for thread_id in range(1, args.t + 1):
        thread = ZgrabThread(
            thread_id,
            global_work_queue,
            args.p,
            run_command,
            zones_struct,
            zgrab_collection,
        )
        thread.start()
        threads.append(thread)
        thread_id += 1

    logger.info("Populating Queue")
    global_queue_lock.acquire()
    for ip in ips:
        global_work_queue.put(ip)
    global_queue_lock.release()

    # Wait for queue to empty
    while not global_work_queue.empty():
        pass

    # Notify threads it's time to exit
    global_exit_flag = 1

    # Wait for all threads to complete
    for t in threads:
        t.join()

    logger.info("Exiting Main Thread")

    logger.info("Global retest list: " + str(len(global_retest_list)))

    # Retest any SMTP hosts that did not respond to the StartTLS handshake
    if args.p == "25" and len(global_retest_list) > 0:
        process_thread(
            logger,
            global_retest_list,
            args.p,
            run_port_25_no_tls_command,
            zones_struct,
            zgrab_collection,
            "retest",
        )

    # Remove old entries from before the scan
    if args.p == "443":
        other_results = zgrab_collection.find({
            "data.tls": {
                "$exists": True
            },
            "data.tls.timestamp": {
                "$lt": now
            }
        })
        for result in other_results:
            zgrab_collection.update_one({"_id": ObjectId(result["_id"])},
                                        {"$unset": {
                                            "data.tls": ""
                                        }})
    elif args.p == "22":
        if "zgrab2" in global_zgrab_path:
            other_results = zgrab_collection.find({
                "data.ssh": {
                    "$exists": True
                },
                "data.ssh.timestamp": {
                    "$lt": now
                }
            })
            for result in other_results:
                zgrab_collection.update_one({"_id": ObjectId(result["_id"])},
                                            {"$unset": {
                                                "data.ssh": ""
                                            }})
        else:
            other_results = zgrab_collection.find({
                "data.xssh": {
                    "$exists": True
                },
                "data.xssh.timestamp": {
                    "$lt": now
                }
            })
            for result in other_results:
                zgrab_collection.update_one({"_id": ObjectId(result["_id"])},
                                            {"$unset": {
                                                "data.xssh": ""
                                            }})
    elif args.p == "25":
        other_results = zgrab_collection.find({
            "data.smtp": {
                "$exists": True
            },
            "data.smtp.timestamp": {
                "$lt": now
            }
        })
        for result in other_results:
            zgrab_collection.update_one({"_id": ObjectId(result["_id"])},
                                        {"$unset": {
                                            "data.smtp": ""
                                        }})
    elif args.p == "465":
        other_results = zgrab_collection.find({
            "data.smtps": {
                "$exists": True
            },
            "data.smtps.timestamp": {
                "$lt": now
            }
        })
        for result in other_results:
            zgrab_collection.update_one({"_id": ObjectId(result["_id"])},
                                        {"$unset": {
                                            "data.smtps": ""
                                        }})

    # Remove any completely empty entries
    zgrab_collection.delete_many({"data": {}})

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemplo n.º 23
0
def main():
    """
    Begin main...
    """
    parser = argparse.ArgumentParser(
        description='Search the Common Crawl graph dataset for new domains')
    parser.add_argument('--url',
                        metavar="URL",
                        help='The URL for the latest vertices file')
    args = parser.parse_args()

    if args.url != None:
        CURRENT_FILE_LIST = args.url

    now = datetime.now()
    print("Starting: " + str(now))

    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    jobs_collection = mongo_connector.get_jobs_connection()

    reversed_zones = ZoneManager.get_reversed_zones(mongo_connector)

    alphabet = list(string.digits + string.ascii_lowercase)

    # Create a dictionary of the zones grouped by their first letter
    # This will allow us to reduce the number of comparisons in the alphabetized CC files.
    grouped_zones = {}
    for letter in alphabet:
        grouped_zones[letter] = []

    for zone in reversed_zones:
        first_letter = zone[0]
        grouped_zones[first_letter].append(zone)

    compressed_download_list = download_file(CURRENT_FILE_LIST)
    subprocess.check_call(["gunzip", "-f", compressed_download_list])

    download_list = compressed_download_list.split(".")[:-1]
    list_file = ".".join(download_list)

    vertices_file_entries = open(list_file, "r")

    for entry in vertices_file_entries:
        # Download file
        vert_file_url = "http://commoncrawl.s3.amazonaws.com/" + entry.rstrip(
            "\n")
        compressed_vertices_file = download_file(vert_file_url)

        # Decompress file
        subprocess.check_call(["gunzip", "-f", compressed_vertices_file])
        vertices_list = compressed_vertices_file.split(".")[:-1]
        vertices_file = ".".join(vertices_list)

        # Get the first and last line of the file
        (first_line, last_line) = get_first_and_last_line(vertices_file)

        # Get the first and last domain
        parts = first_line.split("\t")
        first_domain = parts[1].rstrip("\n")
        first_char = first_domain[0]

        parts = last_line.split("\t")
        last_domain = parts[1].rstrip("\n")
        last_char = last_domain[0]

        # Get the list of zones relevant to that range
        searchable_zones = get_zone_sublist(first_char, last_char,
                                            grouped_zones)

        # Parse file and insert matches
        parse_file(vertices_file, searchable_zones, dns_manager)
        subprocess.check_call(["rm", vertices_file])

    # Remove all entries more than two months old
    # Note: This commented out because Common Crawl graph data is not additive.
    # dns_manager.remove_all_by_source_and_date("common_crawl", -4)

    jobs_collection.update_one({'job_name': 'common_crawl_graph'}, {
        '$currentDate': {
            "updated": True
        },
        "$set": {
            'status': 'COMPLETE'
        }
    })

    now = datetime.now()
    print("Ending: " + str(now))
Exemplo n.º 24
0
def main():

    now = datetime.now()
    print("Starting: " + str(now))

    azure_connector = AzureConnector.AzureConnector()
    mongo_connector = MongoConnector.MongoConnector()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    zone_ingestor = ZoneIngestor.ZoneIngestor()
    jobs_manager = JobsManager.JobsManager(mongo_connector, 'fetch_azure_dns')
    jobs_manager.record_job_start()

    current_zones = ZoneManager.get_distinct_zones(mongo_connector)

    resource_client = azure_connector.get_resources_client()
    resources = []

    # The resource list is not currently used.
    for item in resource_client.resource_groups.list():
        resources.append(item.name)

    dns_client = azure_connector.get_dns_client()

    zones = dns_client.zones.list()

    # The type of records the Azure DNS will let you configure
    record_types = {
        'A': 'arecords',
        'AAAA': 'aaaa_records',
        'MX': 'mx_records',
        'NS': 'ns_records',
        'PTR': 'ptr_records',
        'SRV': 'srv_records',
        'TXT': 'txt_records',
        'CNAME': 'cname_record',
        'SOA': 'soa_record'
    }

    for zone in zones:
        print("Zone: " + zone.name)
        data = split_id(zone.id)

        if zone.zone_type == ZoneType.public:
            print(zone.name + " is public:")

            if zone.name not in current_zones:
                print("Creating zone: " + zone.name)
                zone_ingestor.add_zone(zone.name,
                                       "azure:" + data["resourceGroups"])

            try:
                print("ResourceGroup: " + data["resourceGroups"])
                records = dns_client.record_sets.list_all_by_dns_zone(
                    data["resourceGroups"], zone.name)
                for entry in records:
                    # The record_data id value ends in rtype/rvalue so you must guess the rtype
                    record_data = split_id(entry.id)
                    for rtype in record_types:
                        if rtype in record_data:
                            results = extract_record_set_value(rtype, entry)
                            for result in results:
                                result['zone'] = zone.name
                                result['created'] = datetime.now()
                                result['status'] = 'confirmed'
                                dns_manager.insert_record(
                                    result, "azure:" + data["resourceGroups"])
            except:
                print("No records found")

    jobs_manager.record_job_complete()
Exemplo n.º 25
0
def main():
    """
    Begin Main...
    """
    global global_exit_flag
    global global_zgrab_path

    logger = LoggingUtil.create_log(__name__)

    parser = argparse.ArgumentParser(
        description="Launch zgrab against IPs using port 80 or 443.")
    parser.add_argument("-p",
                        choices=["443", "80"],
                        metavar="port",
                        help="The web port: 80 or 443")
    parser.add_argument("-t",
                        default=5,
                        type=int,
                        metavar="threadCount",
                        help="The number of threads")
    parser.add_argument(
        "--zgrab_path",
        default=global_zgrab_path,
        metavar="zgrabVersion",
        help="The version of ZGrab to use",
    )
    args = parser.parse_args()

    if args.p == None:
        logger.error("A port value (80 or 443) must be provided.")
        exit(1)

    if is_running(os.path.basename(__file__)):
        """
        Check to see if a previous attempt to parse is still running...
        """
        now = datetime.now()
        logger.warning(str(now) + ": I am already running! Goodbye!")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    rm_connector = RemoteMongoConnector.RemoteMongoConnector()
    all_dns_collection = rm_connector.get_all_dns_connection()
    ip_manager = IPManager.IPManager(rm_connector, True)

    jobs_manager = JobsManager.JobsManager(rm_connector,
                                           "zgrab_http_ip-" + args.p)
    jobs_manager.record_job_start()

    zones_struct = {}
    zones_struct["zones"] = ZoneManager.get_distinct_zones(rm_connector)

    # Not pretty but cleaner than previous method
    zones_struct["ip_manager"] = ip_manager

    (ips, ip_context) = get_ips(ip_manager, all_dns_collection)
    logger.info("Got IPs: " + str(len(ips)))
    zones_struct["ip_context"] = ip_context

    if args.p == "443":
        zgrab_collection = rm_connector.get_zgrab_443_data_connection()
        run_command = run_port_443_command
    else:
        zgrab_collection = rm_connector.get_zgrab_80_data_connection()
        run_command = run_port_80_command

    check_save_location("./json_p" + args.p)

    global_zgrab_path = args.zgrab_path

    threads = []

    logger.debug("Creating " + str(args.t) + " threads")
    for thread_id in range(1, args.t + 1):
        thread = ZgrabThread(
            thread_id,
            global_work_queue,
            args.p,
            run_command,
            zones_struct,
            zgrab_collection,
        )
        thread.start()
        threads.append(thread)
        thread_id += 1

    logger.info("Populating Queue")
    global_queue_lock.acquire()
    for ip in ips:
        global_work_queue.put(ip)
    global_queue_lock.release()

    # Wait for queue to empty
    while not global_work_queue.empty():
        pass

    # Notify threads it's time to exit
    global_exit_flag = 1

    # Wait for all threads to complete
    for t in threads:
        t.join()

    logger.info("Exiting Main Thread")

    # Remove last week's old entries
    lastweek = datetime.now() - timedelta(days=7)
    zgrab_collection.delete_many({
        "ip": {
            "$ne": "<nil>"
        },
        "timestamp": {
            "$lt": lastweek
        }
    })

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemplo n.º 26
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    if is_running(os.path.basename(__file__)):
        logger.warning("Already running...")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    parser = argparse.ArgumentParser(
        description="Parse Sonar files based on domain zones.")
    parser.add_argument(
        "--sonar_file_type",
        choices=["dns-any", "dns-a", "rdns"],
        required=True,
        help='Specify "dns-any", "dns-a", or "rdns"',
    )
    parser.add_argument(
        "--database",
        choices=["local", "remote"],
        required=False,
        default="local",
        help="Whether to use the local or remote DB",
    )
    args = parser.parse_args()

    r7 = Rapid7.Rapid7()

    if args.database == "remote":
        mongo_connector = RemoteMongoConnector.RemoteMongoConnector()
        dns_manager = DNSManager.DNSManager(mongo_connector,
                                            "get_sonar_data_dns")
    else:
        mongo_connector = MongoConnector.MongoConnector()
        dns_manager = DNSManager.DNSManager(mongo_connector)

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    save_directory = "./files/"
    check_save_location(save_directory)

    # A session is necessary for the multi-step log-in process
    s = requests.Session()

    if args.sonar_file_type == "rdns":
        logger.info("Updating RDNS records")
        jobs_manager = JobsManager.JobsManager(mongo_connector,
                                               "get_sonar_data_rdns")
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "rdns", jobs_manager)
            if html_parser.rdns_url == "":
                logger.error("Unknown Error")
                jobs_manager.record_job_error()
                exit(0)

            unzipped_rdns = download_remote_files(logger, s,
                                                  html_parser.rdns_url,
                                                  save_directory, jobs_manager)
            update_rdns(logger, unzipped_rdns, zones, dns_manager,
                        mongo_connector)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))
            jobs_manager.record_job_error()
            exit(0)

        jobs_manager.record_job_complete()
    elif args.sonar_file_type == "dns-any":
        logger.info("Updating DNS ANY records")

        jobs_manager = JobsManager.JobsManager(mongo_connector,
                                               "get_sonar_data_dns-any")
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "fdns", jobs_manager)
            if html_parser.any_url != "":
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.any_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, zones, dns_manager)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))
            jobs_manager.record_job_error()
            exit(0)

        jobs_manager.record_job_complete()
    elif args.sonar_file_type == "dns-a":
        logger.info("Updating DNS A, AAAA, and CNAME records")

        jobs_manager = JobsManager.JobsManager(mongo_connector,
                                               "get_sonar_data_dns-a")
        jobs_manager.record_job_start()

        try:
            html_parser = r7.find_file_locations(s, "fdns", jobs_manager)
            if html_parser.a_url != "":
                logger.info("Updating A records")
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.a_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, zones, dns_manager)
            if html_parser.aaaa_url != "":
                logger.info("Updating AAAA records")
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.aaaa_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, zones, dns_manager)
            if html_parser.cname_url != "":
                logger.info("Updating CNAME records")
                unzipped_dns = download_remote_files(logger, s,
                                                     html_parser.cname_url,
                                                     save_directory,
                                                     jobs_manager)
                update_dns(logger, unzipped_dns, zones, dns_manager)
        except Exception as ex:
            logger.error("Unexpected error: " + str(ex))
            jobs_manager.record_job_error()
            exit(0)

        jobs_manager.record_job_complete()
    else:
        logger.error("Unrecognized sonar_file_type option. Exiting...")

    now = datetime.now()
    print("Complete: " + str(now))
    logger.info("Complete.")
Exemplo n.º 27
0
def main():
    """
    Begin Main...
    """
    logger = LoggingUtil.create_log(__name__)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    # Set up the common objects
    mongo_connector = MongoConnector.MongoConnector()
    ct_collection = mongo_connector.get_certificate_transparency_connection()
    zones = ZoneManager.get_distinct_zones(mongo_connector)
    jobs_manager = JobsManager.JobsManager(mongo_connector, "get_crt_sh")
    jobs_manager.record_job_start()

    save_location = "/mnt/workspace/crt_sh"
    download_method = 'dbAndSave'

    parser = argparse.ArgumentParser(
        description='Download DNS and/or certificate information from crt.sh.')
    parser.add_argument(
        '--fetch_dns_records',
        action='store_true',
        help='Indicates whether to add DNS entries to the database')
    parser.add_argument(
        '--download_methods',
        choices=['dbAndSave', 'dbOnly'],
        default=download_method,
        help=
        'Indicates whether to download the raw files or just record in the database.'
    )
    parser.add_argument(
        '--cert_save_location',
        required=False,
        default=save_location,
        help=
        'Indicates where to save the certificates on disk when choosing dbAndSave'
    )
    args = parser.parse_args()

    if args.cert_save_location:
        save_location = args.cert_save_location
        if not save_location.endswith("/"):
            save_location = save_location + "/"

    if args.download_methods == 'dbAndSave':
        check_save_location(save_location)

    for zone in zones:
        # Pace out requests so as not to DoS crt.sh and Google DNS
        time.sleep(5)

        # This could be done with backoff but we don't want to be overly aggressive.
        json_result = make_https_request(
            logger, "https://crt.sh/?q=%25." + zone + "&output=json")
        if json_result is None:
            logger.warning("Can't find result for: " + zone)
            json_result = "{}"

        json_data = json.loads(json_result)

        new_names = []
        new_ids = []
        for entry in json_data:
            if entry['id'] not in new_ids:
                new_ids.append(entry['id'])

            if "*" not in entry["name_value"] and entry[
                    "name_value"] not in new_names:
                new_names.append(entry["name_value"])

        if args.fetch_dns_records:
            add_new_domain_names(new_names, zones, mongo_connector)

        if args.download_methods == "dbAndSave":
            add_new_certificate_values(logger, new_ids, ct_collection, zones,
                                       save_location)
        elif args.download_methods == "dbOnly":
            add_new_certificate_values(logger, new_ids, ct_collection, zones,
                                       None)

    # Set isExpired for any entries that have recently expired.
    ct_collection.update(
        {
            "not_after": {
                "$lt": datetime.utcnow()
            },
            "isExpired": False
        }, {"$set": {
            "isExpired": True
        }},
        multi=True)

    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")
Exemplo n.º 28
0
def main():
    """
    Begin main...
    """

    if is_running("get_censys_files.py"):
        """
        Check to see if a download is in process...
        """
        now = datetime.now()
        print(str(now) + ": Can't run due to get_files running. Goodbye!")
        exit(0)

    if is_running(os.path.basename(__file__)):
        """
        Check to see if a previous attempt to parse is still running...
        """
        now = datetime.now()
        print(str(now) + ": I am already running! Goodbye!")
        exit(0)

    # Make the relevant database connections
    RMC = RemoteMongoConnector.RemoteMongoConnector()

    # Verify that the get_files script has a recent file in need of parsing.
    jobs_collection = RMC.get_jobs_connection()

    status = jobs_collection.find_one({'job_name': 'censys'})
    if status['status'] != "DOWNLOADED":
        now = datetime.now()
        print(str(now) + ": The status is not set to DOWNLOADED. Goodbye!")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))

    # Collect the list of available zones
    zones = ZoneManager.get_distinct_zones(RMC)

    print("Zones: " + str(len(zones)))

    # Collect the list of AWS CIDRs
    aws_ips = []
    get_aws_ips(RMC, aws_ips)

    print("AWS IPs: " + str(len(aws_ips)))

    # Collect the list of Azure CIDRs
    azure_ips = []
    get_azure_ips(RMC, azure_ips)

    print("Azure IPs: " + str(len(azure_ips)))

    # Collect the list of known CIDRs
    ip_zones_collection = RMC.get_ipzone_connection()

    results = ip_zones_collection.find({'status': {"$ne": "false_positive"}})
    cidrs = []
    for entry in results:
        cidrs.append(IPNetwork(entry['zone']))

    print("CIDRs: " + str(len(cidrs)))

    # Get the current configuration information for Marinus.
    config_collection = RMC.get_config_connection()

    configs = config_collection.find({})
    orgs = []
    for org in configs[0]['SSL_Orgs']:
        orgs.append(org)

    print("Orgs: " + str(len(orgs)))

    # Obtain the name of the decompressed file.
    filename_f = open(FILENAME_FILE, "r")
    decompressed_file = filename_f.readline()
    filename_f.close()

    # For manual testing: decompressed_file = "ipv4.json"

    now = datetime.now()
    print(str(now) + ": Beginning file processing...")

    # Remove old results from the database
    results_collection = RMC.get_results_connection()
    results_collection.remove({})
    all_dns_collection = RMC.get_all_dns_connection()

    try:
        with open(decompressed_file, "r") as dec_f:
            for line in dec_f:
                try:
                    entry = json.loads(line)
                    """
                    Does the SSL certificate match a known organization?
                    Is the IP address in a known CIDR?
                    """
                    if check_in_org(entry, orgs) or \
                       check_in_cidr(entry['ip'], cidrs):
                        entry['zones'] = check_in_zone(entry, zones)
                        entry['aws'] = is_aws_ip(entry['ip'], aws_ips)
                        entry['azure'] = is_azure_ip(entry['ip'], azure_ips)
                        (domains,
                         zones) = lookup_domain(entry, zones,
                                                all_dns_collection)
                        if len(domains) > 0:
                            entry['domains'] = domains
                            if len(zones) > 0:
                                for zone in zones:
                                    if zone not in entry['zones']:
                                        entry['zones'].append(zone)
                        insert_result(entry, results_collection)
                    # else:
                    #     #This will add days to the amount of time necessary to scan the file.
                    #     matched_zones = check_in_zone(entry, zones)
                    #     if matched_zones != []:
                    #         entry['zones'] = matched_zones
                    #         entry['aws'] = is_aws_ip(entry['ip'], aws_ips)
                    #         entry['azure'] = is_azure_ip(entry['ip'], azure_ips)
                    #         insert_result(entry, results_collection)
                except ValueError as err:
                    print("Value Error!")
                    print(str(err))
                except:
                    print("Line unexpected error:", sys.exc_info()[0])
                    print("Line unexpected error:", sys.exc_info()[1])
    except IOError as err:
        print("I/O error({0}): {1}".format(err.errno, err.strerror))
        exit(0)
    except:
        print("Unexpected error:", sys.exc_info()[0])
        print("Unexpected error:", sys.exc_info()[1])
        exit(0)

    # Indicate that the processing of the job is complete and ready for download to Marinus
    jobs_collection.update_one({'job_name': 'censys'}, {
        '$currentDate': {
            "updated": True
        },
        "$set": {
            'status': 'COMPLETE'
        }
    })

    now = datetime.now()
    print("Ending: " + str(now))
Exemplo n.º 29
0
def main():
    """
    Begin Main...
    """

    # The sources for which to remove expired entries
    # Infoblox is handled separately
    # {"source_name": date_difference_in_months}
    sources = [{
        "name": "sonar_dns",
        "diff": -2
    }, {
        "name": "sonar_dns_saved",
        "diff": -2
    }, {
        "name": "sonar_rdns",
        "diff": -2
    }, {
        "name": "sonar_rdns_saved",
        "diff": -2
    }, {
        "name": "ssl",
        "diff": -2
    }, {
        "name": "ssl_saved",
        "diff": -2
    }, {
        "name": "virustotal",
        "diff": -2
    }, {
        "name": "virustotal_saved",
        "diff": -2
    }, {
        "name": "UltraDNS",
        "diff": -2
    }, {
        "name": "UltraDNS_saved",
        "diff": -2
    }, {
        "name": "marinus",
        "diff": -2
    }, {
        "name": "marinus_saved",
        "diff": -2
    }, {
        "name": "mx",
        "diff": -2
    }, {
        "name": "mx_saved",
        "diff": -2
    }, {
        "name": "common_crawl",
        "diff": -4
    }, {
        "name": "common_crawl_saved",
        "diff": -4
    }]

    amass_diff = -2

    now = datetime.now()
    print("Starting: " + str(now))

    mongo_connector = MongoConnector.MongoConnector()
    all_dns_collection = mongo_connector.get_all_dns_connection()
    dns_manager = DNSManager.DNSManager(mongo_connector)
    GDNS = GoogleDNS.GoogleDNS()
    ip_manager = IPManager.IPManager(mongo_connector)

    jobs_manager = JobsManager.JobsManager(mongo_connector,
                                           'remove_expired_entries')
    jobs_manager.record_job_start()

    zones = ZoneManager.get_distinct_zones(mongo_connector)

    # Get the date for today minus two months
    d_minus_2m = monthdelta(datetime.now(), -2)

    print("Removing SRDNS as of: " + str(d_minus_2m))

    # Remove the old records
    srdns_collection = mongo_connector.get_sonar_reverse_dns_connection()
    srdns_collection.remove({'updated': {"$lt": d_minus_2m}})

    ip_manager.delete_records_by_date(d_minus_2m)

    # Before completely removing old entries, make an attempt to see if they are still valid.
    # Occasionally, a host name will still be valid but, for whatever reason, is no longer tracked by a source.
    # Rather than throw away valid information, this will archive it.
    for entry in sources:
        removal_date = monthdelta(datetime.now(), entry['diff'])
        source = entry['name']
        print("Removing " + source + " as of: " + str(removal_date))

        last_domain = ""
        results = all_dns_collection.find({
            'sources': {
                "$size": 1
            },
            'sources.source': source,
            'sources.updated': {
                "$lt": removal_date
            }
        })
        for result in results:
            if result['fqdn'] != last_domain:
                last_domain = result['fqdn']

                lookup_int = get_lookup_int(result, GDNS)
                dns_result = GDNS.fetch_DNS_records(result['fqdn'], lookup_int)

                if dns_result != []:
                    insert_current_results(dns_result, dns_manager, zones,
                                           result, source)

        dns_manager.remove_all_by_source_and_date(source, entry['diff'])

    # Process amass entries
    temp_sources = mongo_connector.perform_distinct(all_dns_collection,
                                                    'sources.source')
    amass_sources = []
    for entry in temp_sources:
        if entry.startswith("amass:"):
            amass_sources.append(entry)

    for source in amass_sources:
        removal_date = monthdelta(datetime.now(), amass_diff)
        print("Removing " + source + " as of: " + str(removal_date))

        last_domain = ""
        results = mongo_connector.perform_find(
            all_dns_collection, {
                'sources': {
                    "$size": 1
                },
                'sources.source': source,
                'sources.updated': {
                    "$lt": removal_date
                }
            })
        for result in results:
            if result['fqdn'] != last_domain:
                last_domain = result['fqdn']

                lookup_int = get_lookup_int(result, GDNS)
                dns_result = GDNS.fetch_DNS_records(result['fqdn'], lookup_int)

                if dns_result != []:
                    insert_current_results(dns_result, dns_manager, zones,
                                           result, source)

        dns_manager.remove_all_by_source_and_date(source, amass_diff)

    # Record status
    jobs_manager.record_job_complete()

    now = datetime.now()
    print("Complete: " + str(now))
Exemplo n.º 30
0
def main():
    """
    Begin main...
    """
    logger = LoggingUtil.create_log(__name__)

    if is_running("get_censys_files.py"):
        """
        Check to see if a download is in process...
        """
        logger.warning("Can't run due to get_files running. Goodbye!")
        exit(0)

    if is_running(os.path.basename(__file__)):
        """
        Check to see if a previous attempt to parse is still running...
        """
        logger.warning("I am already running! Goodbye!")
        exit(0)

    # Make the relevant database connections
    RMC = RemoteMongoConnector.RemoteMongoConnector()

    ip_manager = IPManager.IPManager(RMC)

    # Verify that the get_files script has a recent file in need of parsing.
    jobs_collection = RMC.get_jobs_connection()

    status = jobs_collection.find_one({"job_name": "censys"})
    if status["status"] != "DOWNLOADED":
        logger.warning("The status is not set to DOWNLOADED. Goodbye!")
        exit(0)

    now = datetime.now()
    print("Starting: " + str(now))
    logger.info("Starting...")

    # Collect the list of available zones
    zones = ZoneManager.get_distinct_zones(RMC)

    logger.info("Zones: " + str(len(zones)))

    # Get the current configuration information for Marinus.
    config_collection = RMC.get_config_connection()

    configs = config_collection.find({})
    orgs = []
    for org in configs[0]["SSL_Orgs"]:
        orgs.append(org)

    logger.info("Orgs: " + str(len(orgs)))

    # Obtain the name of the decompressed file.
    filename_f = open(FILENAME_FILE, "r")
    decompressed_file = filename_f.readline()
    filename_f.close()

    # For manual testing: decompressed_file = "ipv4.json"

    logger.info("Beginning file processing...")

    # Remove old results from the database
    results_collection = RMC.get_results_connection()
    results_collection.delete_many({})
    all_dns_collection = RMC.get_all_dns_connection()

    try:
        with open(decompressed_file, "r") as dec_f:
            for line in dec_f:
                try:
                    entry = json.loads(line)
                    """
                    Does the SSL certificate match a known organization?
                    Is the IP address in a known CIDR?
                    Is the IP address recorded in Splunk?
                    """
                    if (check_in_org(entry, orgs)
                            or ip_manager.is_tracked_ip(entry["ip"])
                            or ip_manager.find_splunk_data(entry["ip"], "AWS")
                            is not None or ip_manager.find_splunk_data(
                                entry["ip"], "AZURE") is not None):
                        entry["zones"] = check_in_zone(entry, zones)
                        entry["aws"] = ip_manager.is_aws_ip(entry["ip"])
                        entry["azure"] = ip_manager.is_azure_ip(entry["ip"])
                        (domains,
                         zones) = lookup_domain(entry, zones,
                                                all_dns_collection)
                        if len(domains) > 0:
                            entry["domains"] = domains
                            if len(zones) > 0:
                                for zone in zones:
                                    if zone not in entry["zones"]:
                                        entry["zones"].append(zone)
                        insert_result(entry, results_collection)
                    # else:
                    #     #This will add days to the amount of time necessary to scan the file.
                    #     matched_zones = check_in_zone(entry, zones)
                    #     if matched_zones != []:
                    #         entry['zones'] = matched_zones
                    #         entry['aws'] = ip_manager.is_aws_ip(entry['ip'])
                    #         entry['azure'] = ip_manager.is_azure_ip(entry['ip'])
                    #         insert_result(entry, results_collection)
                except ValueError as err:
                    logger.error("Value Error!")
                    logger.error(str(err))
                except:
                    logger.error("Line unexpected error: " +
                                 str(sys.exc_info()[0]))
                    logger.error("Line unexpected error: " +
                                 str(sys.exc_info()[1]))
    except IOError as err:
        logger.error("I/O error({0}): {1}".format(err.errno, err.strerror))
        exit(1)
    except:
        logger.error("Unexpected error: " + str(sys.exc_info()[0]))
        logger.error("Unexpected error: " + str(sys.exc_info()[1]))
        exit(1)

    # Indicate that the processing of the job is complete and ready for download to Marinus
    jobs_collection.update_one(
        {"job_name": "censys"},
        {
            "$currentDate": {
                "updated": True
            },
            "$set": {
                "status": "COMPLETE"
            }
        },
    )

    now = datetime.now()
    print("Ending: " + str(now))
    logger.info("Complete.")