def download_file(logger, url, out_file): result = False try: out_fd = open('{}/{}'.format(sc.get_config_item('tmp_dir'), out_file), 'wb') except Exception as e: logger.log_err( 'Failed to open output file {} in temporary directory ({})'.format( out_file, e)) return False try: response = requests.get(url, stream=True) if response.status_code == 200: if 'Content-Length' in response.headers: logger.log_info('Downloading {} bytes from {}'.format( response.headers['Content-Length'], url)) else: logger.log_info('Downloading {}'.format(url)) downloaded_bytes = 0 for chunk in response.iter_content(1024 * 1024): out_fd.write(chunk) out_fd.flush() downloaded_bytes += len(chunk) logger.log_info('Downloaded {} bytes from {}'.format( downloaded_bytes, url)) result = True else: logger.log_err('GET {} returned {}'.format(url, response.status_code)) except Exception as e: logger.log_err('Failed to start download from {} ({})'.format(url, e)) out_fd.close() if not result: cleanup_tmp_file(out_file) return result
def process_avro_files(logger, day, avro_dir, proc_count, out_dir, tld, tlsa_one_set, tlsa_all_set): avro_list = [] for f in os.listdir(avro_dir): if f.lower().endswith('.avro'): avro_list.append(f) logger.log_info('Found {} Avro files in {}'.format(len(avro_list), avro_dir)) logger.log_info('Writing results to {}'.format(out_dir)) oilog.mkdir_p(out_dir) analysis_queue = mp.Queue() for a in avro_list: analysis_queue.put(a) analysis_procs = set() for t in range(0, proc_count): analysis_procs.add( mp.Process(target=avro_check_proc, args=(logger, analysis_queue, avro_dir, out_dir, tld, tlsa_one_set, tlsa_all_set))) logger.log_info('Starting analysis processes') for t in analysis_procs: t.start() while len(analysis_procs) > 0: for t in analysis_procs: t.join(0.25) if not t.is_alive(): analysis_procs.remove(t) break logger.log_info('Merging individual results') tot_count = 0 result_name = '{}/{}-results-{}.json.bz2'.format(out_dir, tld, day) result_fd = bz2.open(result_name, 'wt') result_fd.write('[\n') stats_dict = dict() for a in avro_list: json_name = a.replace('.avro', '.json') logger.log_info('Merging in {}/{}'.format(out_dir, json_name)) json_fd = open('{}/{}'.format(out_dir, json_name), 'r') count = 0 for line in json_fd: line = line.strip('\r').strip('\n') result_fd.write('{},\n'.format(line)) count += 1 json_fd.close() logger.log_info('Merged {} results from {}/{}'.format( count, out_dir, json_name)) tot_count += count os.unlink('{}/{}'.format(out_dir, json_name)) stats_name = a.replace('.avro', '-stats.json') logger.log_info('Collecting stats from {}/{}'.format( out_dir, stats_name)) stats_fd = open('{}/{}'.format(out_dir, stats_name), 'r') for line in stats_fd: line = line.strip('\r').strip('\n') avro_stats = json.loads(line) for key in avro_stats: stat = stats_dict.get(key, int(0)) stat += avro_stats[key] stats_dict[key] = stat stats_fd.close() os.unlink('{}/{}'.format(out_dir, stats_name)) result_fd.write('{}\n') result_fd.write(']\n') result_fd.close() logger.log_info('Done, wrote {} results to {}'.format( tot_count, result_name)) stats_name = '{}/{}-stats-{}.json'.format(out_dir, tld, day) stats_out = open(stats_name, 'w') stats_out.write('{}\n'.format(json.dumps(stats_dict))) stats_out.close() logger.log_info('Wrote statistics to {}'.format(stats_name)) consolidated_avro_log = '{}/{}-avrologs-{}.log.bz2'.format( sc.get_config_item('log_dir'), tld, day) cl_fd = bz2.open(consolidated_avro_log, 'wt') for a in avro_list: log_name = '{}/{}'.format(sc.get_config_item('log_dir'), a.replace('.avro', '.log')) log_fd = open(log_name, 'r') for line in log_fd: cl_fd.write(line) log_fd.close() logger.log_info('Added {} to consolidated Avro log'.format(log_name)) os.unlink(log_name) cl_fd.close() logger.log_info( 'Consolidated Avro logs to {}'.format(consolidated_avro_log))
def main(): argparser = argparse.ArgumentParser( description='Perform DNSSEC checks against Avro files in a directory') argparser.add_argument('-c, --config', nargs=1, help='configuration file to use', type=str, metavar='config_file', dest='config_file', required=True) argparser.add_argument('-d, --date', nargs=1, help='date to process (defaults to yesterday)', type=str, metavar='process_date', dest='process_date', required=False) args = argparser.parse_args() # Load configuration try: sc.load_config(args.config_file[0]) except Exception as e: print(e) sys.exit(1) oilog.set_log_dir(sc.get_config_item('log_dir')) day = datetime.date.today() - datetime.timedelta(days=1) if args.process_date is not None: day = dateutil.parser.parse(args.process_date[0]).date() logger = oilog.OILog() logger.open('oi-dnssecchecks-{}-{}.log'.format(day, sc.get_config_item('tld'))) # Download required data if not download_data(logger, day): logger.log_err( 'Failed to download data for {}, bailing out'.format(day)) sys.exit(1) # Load TLSA sets tlsa_one_set = load_tlsa_list( '{}/tlsa-all-{}-{}.txt'.format(sc.get_config_item('tmp_dir'), sc.get_config_item('tld'), day), logger) tlsa_all_set = load_tlsa_list( '{}/tlsa-one-{}-{}.txt'.format(sc.get_config_item('tmp_dir'), sc.get_config_item('tld'), day), logger) cleanup_tmp_file('tlsa-all-{}-{}.txt'.format(sc.get_config_item('tld'), day)) cleanup_tmp_file('tlsa-one-{}-{}.txt'.format(sc.get_config_item('tld'), day)) try: process_avro_files(logger, day, sc.get_config_item('tmp_dir'), sc.get_config_item('multi_process_count', 1), sc.get_config_item('out_dir'), sc.get_config_item('tld'), tlsa_one_set, tlsa_all_set) except Exception as e: logger.log_err('Process terminated with an exception') logger.log_err(e) logger.close()
def download_data(logger, day): tar_url = 'https://data.openintel.nl/data/open-tld/{}/openintel-open-tld-{:04d}{:02d}{:02d}.tar'.format( day.year, day.year, day.month, day.day) tlsa_all_url = 'https://data.openintel.nl/data/open-tld/{}/tlsa/{}-tlsa-all-mx-{}.txt'.format( day.year, sc.get_config_item('tld'), day) tlsa_one_url = 'https://data.openintel.nl/data/open-tld/{}/tlsa/{}-tlsa-one-mx-{}.txt'.format( day.year, sc.get_config_item('tld'), day) logger.log_info('Fetching Avro data from {}'.format(tar_url)) if not download_file(logger, tar_url, 'opentld-{}.tar'.format(day)): return False logger.log_info( 'Fetching domains with TLSA records for all MX records from {}'.format( tlsa_all_url)) if not download_file( logger, tlsa_all_url, 'tlsa-all-{}-{}.txt'.format( sc.get_config_item('tld'), day)): cleanup_tmp_file('opentld-{}.tar'.format(day)) return False logger.log_info( 'Fetching domains with TLSA records for at least one MX record from {}' .format(tlsa_one_url)) if not download_file( logger, tlsa_one_url, 'tlsa-one-{}-{}.txt'.format( sc.get_config_item('tld'), day)): cleanup_tmp_file('opentld-{}.tar'.format(day)) cleanup_tmp_file('tlsa-all-{}-{}.txt'.format(sc.get_config_item('tld'), day)) return False try: untar = tarfile.open('{}/opentld-{}.tar'.format( sc.get_config_item('tmp_dir'), day)) untar.extractall(sc.get_config_item('tmp_dir')) untar.close() except Exception as e: logger.log_err('Failed to unpack {}/{} ({})'.format( sc.get_config_item('tmp_dir'), 'opentld-{}.tar'.format(day), e)) cleanup_tmp_file('opentld-{}.tar'.format(day)) cleanup_tmp_file('tlsa-all-{}-{}.txt'.format(sc.get_config_item('tld'), day)) cleanup_tmp_file('tlsa-one-{}-{}.txt'.format(sc.get_config_item('tld'), day)) for t in glob.glob('{}/*.avro'.format(sc.get_config_item('tmp_dir'))): logger.log_info('Cleaning up {}'.format(t)) os.unlink(t) return False cleanup_tmp_file('opentld-{}.tar'.format(day)) return True
def cleanup_tmp_file(tmp_name): try: os.unlink('{}/{}'.format(sc.get_config_item('tmp_dir'), tmp_name)) except: pass
def process_date(day): print('Ziggy is processing data for {}'.format(day)) # Step 1: download tar archives for the specified date day_tarfiles = [] base_url = "https://ftp.ripe.net/rpki" tals = [ "afrinic.tal", "apnic-afrinic.tal", "apnic-arin.tal", "apnic-iana.tal", "apnic-lacnic.tal", "apnic-ripe.tal", "apnic.tal", "arin.tal", "lacnic.tal", "ripencc.tal" ] tmp_dir = sc.get_path_item('tmp-dir') for tal in tals: tal_file = '{}/{}.tar.gz'.format(tmp_dir, tal) tal_url = '{}/{}/{:04d}/{:02d}/{:02d}/repo.tar.gz'.format( base_url, tal, day.year, day.month, day.day) if download_from_url(tal_url, tal_file): day_tarfiles.append(tal_file) # Step 2: clean out the Routinator cache and TAL directory routinator_cache = sc.get_path_item('routinator-cache') routinator_tals = sc.get_path_item('routinator-tals') try: sys.stdout.write('Cleaning out {} ... '.format(routinator_cache)) sys.stdout.flush() shutil.rmtree(routinator_cache) os.mkdir(routinator_cache) print('OK') sys.stdout.write('Cleaning out {} ... '.format(routinator_tals)) sys.stdout.flush() shutil.rmtree(routinator_tals) os.mkdir(routinator_tals) print('OK') except Exception as e: print('FAILED') raise e # Step 3: extract the unvalidated data from the tar archives ignore_tals = sc.get_config_item('ignore-tals') latest_time = datetime.datetime.fromtimestamp(0) for tarchive in day_tarfiles: sys.stdout.write('Ziggy is processing {} ... '.format(tarchive)) sys.stdout.flush() obj_count = 0 try: t = tarfile.open('{}'.format(tarchive)) basepath = None wrote_ta = False for member in t: if '/unvalidated/' in member.name and member.isfile(): pathcomp = member.name.split('/') i = 0 while pathcomp[i] != 'unvalidated': i += 1 i += 1 write_path = '{}/{}'.format(routinator_cache, '/'.join(pathcomp[i:-1])) if basepath == None: basepath = pathcomp[i] try: os.makedirs(write_path) except: pass out_fd = open('{}/{}'.format(write_path, pathcomp[-1]), 'wb') in_fd = t.extractfile(member) buf = in_fd.read(1024) while len(buf) > 0: out_fd.write(buf) buf = in_fd.read(1024) out_fd.close() in_fd.close() obj_count += 1 if datetime.datetime.fromtimestamp( member.mtime) > latest_time: latest_time = datetime.datetime.fromtimestamp( member.mtime) elif member.name.endswith('.tal.cer') and member.isfile(): if wrote_ta: raise Exception( "Already wrote a TA for {}, wasn't expecting another one." .format(tarchive)) out_fd = open('{}/tmp-ta.cer'.format(routinator_cache), 'wb') in_fd = t.extractfile(member) buf = in_fd.read(1024) while len(buf) > 0: out_fd.write(buf) buf = in_fd.read(1024) out_fd.close() in_fd.close() wrote_ta = True print('OK ({} objects)'.format(obj_count)) if not wrote_ta: print('Warning, found no TA in {}'.format(tarchive)) else: if basepath in ignore_tals: print('Ignoring TAL for {}'.format(basepath)) os.unlink('{}/tmp-ta.cer'.format(routinator_cache)) else: # For some older archives, the TA certificate is # sometimes encoded in PEM format. Convert it to # DER if necessary ta_fd = open('{}/tmp-ta.cer'.format(routinator_cache), 'rb') is_pem = False pem_header = bytes('-----BEGIN CERTIFICATE-----', 'utf8') for line in ta_fd: if len(line) >= len(pem_header) and line[:len( pem_header)] == pem_header: is_pem = True break ta_fd.close() if is_pem: print( 'Found an old TA certificate in PEM format, converting to DER' ) osslcmd = 'openssl x509 -inform PEM -in {}/tmp-ta.cer -outform DER -out {}/tmp-ta-der.cer'.format( routinator_cache, routinator_cache) if os.system(osslcmd) != 0: raise Exception( "Fail to convert TA from PEM to DER") os.unlink('{}/tmp-ta.cer'.format(routinator_cache)) os.rename('{}/tmp-ta-der.cer'.format(routinator_cache), '{}/tmp-ta.cer'.format(routinator_cache)) # Move the TA in place ta_name = 'ta.cer' tal_name = "{}.tal".format(basepath) # From Oct 2012 - Apr 2018, APNIC had a different repo structure # that we need to account for when recreating the TALs if 'apnic' in tarchive: fields = tarchive.split('.') for field in fields: if 'apnic' in field: path_elems = field.split('/') ta_name = 'ta-{}.cer'.format(path_elems[-1]) tal_name = '{}-{}.tal'.format( basepath, path_elems[-1]) ta_path = '{}/{}/ta'.format(routinator_cache, basepath) sys.stdout.write('Moving TA to {}/{} ...'.format( ta_path, ta_name)) sys.stdout.flush() try: os.makedirs(ta_path) except: pass os.rename('{}/tmp-ta.cer'.format(routinator_cache), '{}/{}'.format(ta_path, ta_name)) print('OK') sys.stdout.write('Creating a TAL for this TA ... ') sys.stdout.flush() tal = open('{}/{}'.format(routinator_tals, tal_name), 'w') tal.write('rsync://{}/ta/{}\n\n'.format(basepath, ta_name)) tal.close() osslcmd = "openssl x509 -inform DER -in {}/{} -pubkey -noout | awk '!/-----(BEGIN|END)/' >> {}/{}".format( ta_path, ta_name, routinator_tals, tal_name) if os.system(osslcmd) != 0: print('FAILED') raise Exception('Failed to create a TAL') print('OK') except Exception as e: print('Failed to process {}'.format(tarchive)) raise e # Step 4: invoke the Routinator print('Ziggy thinks the Routinator should travel back to: {}'.format( latest_time)) vrp_path = sc.get_path_item('vrp-out-name') log_path = sc.get_path_item('routinator-log-name') vrp_form = sc.get_config_item('vrp-out-format', 'csv') routinator_cmd = "faketime '{}' {} -vv --logfile {} vrps -n -o {} -f {}".format( latest_time, sc.get_config_item('routinator', 'routinator'), log_path.format(day), vrp_path.format(day), vrp_form) print('Invoking the Routinator as:') print(routinator_cmd) if os.system(routinator_cmd) != 0: print('Routinator exited with an error') else: print('Routinator indicated success!') # Step 5: clean up for tf in day_tarfiles: os.unlink(tf)