def download_data_for_current_date(): """ Скачивает все необходимы файлы для парсинга С R01 данные по локальным зонам https://partner.r01.ru/zones/ru_domains.gz https://partner.r01.ru/zones/su_domains.gz https://partner.r01.ru/zones/rf_domains.gz С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге http://phpsuxx.blogspot.com/2011/12/full-bgp.html http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html для остальных зоне можно посмотреть http://csa.ee/databases-zone-files/ :rtype: unicode """ now_date = datetime.date.today() delta = datetime.timedelta(days=1) now_date = now_date - delta files_list = [{ 'url': 'https://partner.r01.ru/zones/ru_domains.gz', 'file_name': 'ru_domains.gz' }, { 'url': 'https://partner.r01.ru/zones/su_domains.gz', 'file_name': 'su_domains.gz' }, { 'url': 'https://partner.r01.ru/zones/rf_domains.gz', 'file_name': 'rf_domains.gz' }, { 'url': 'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2' % (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")), 'file_name': 'rib.bz2' }] path = Downloader.create_data_dir() for item in files_list: path_file = os.path.abspath(os.path.join(path, item['file_name'])) BColor.process("Download %s to %s " % (item['url'], path_file)) shutil.rmtree(path_file, ignore_errors=True) Downloader.download_file(item['url'], path_file) if os.path.getsize(path_file) == 0: BColor.error("Can`t download file %s to %s" % (item['url'], path_file)) raise Exception("Can`t download file %s to %s" % (item['url'], path_file)) return path
def delete_not_updated_today(count_all_domain=False): """ :type count_all_domain: bool|dict :return: """ connection = get_mysql_connection() cursor = connection.cursor(MySQLdb.cursors.DictCursor) sql_trigger_enable = "SET @TRIGGER_DISABLED = 0" sql_trigger_disable = "SET @TRIGGER_DISABLED = 1" if not count_all_domain: sql = "DELETE FROM domain WHERE load_today = 'N'" BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_disable) sql = "UPDATE domain SET load_today = 'N'" BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_enable) else: for key_tld, tld_count_in_file in count_all_domain.iteritems(): cursor.execute( "SELECT count(*) as domain_count FROM domain WHERE tld = '%s'" % str(key_tld)) count_in_base = cursor.fetchone() BColor.process("Count zone (%s) in file %s, in base %s" % (str(key_tld), str(tld_count_in_file), str(count_in_base['domain_count']))) if count_in_base and int(count_in_base['domain_count']) >= int( tld_count_in_file): sql = "DELETE FROM domain WHERE load_today = 'N' AND tld = '%s'" % str( key_tld) BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_disable) sql = "UPDATE domain SET load_today = 'N' WHERE tld = '%s'" % str( key_tld) BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_enable) else: BColor.error( "TLD %s - count in file %s, count in base %s" % (str(key_tld), str(count_in_base), str(tld_count_in_file))) connection.commit() connection.close()
def unzip_file(path_file: str) -> bool: """ :rtype path_file: unicode :return: """ gunzip = Gunzip(path_file) command = gunzip.get_command() p = SubprocessRunner(command=command) p.run() p.wait(write_output_in_log=False) if p.process.returncode != 0: BColor.error("unzip p.process.returncode = %s" % p.process.returncode) return False return True
def download(path: str, item: dict): """ :return: """ file_name = item['file_name'] url = item['url'] path_file = os.path.abspath(os.path.join(path, file_name)) BColor.process("Download %s to %s " % (url, path_file)) shutil.rmtree(path_file, ignore_errors=True) Downloader.download_file(url, path_file) if os.path.getsize(path_file) == 0: BColor.error("Can`t download file %s to %s" % (url, path_file)) raise Exception("Can`t download file %s to %s" % (url, path_file)) return os.path.getsize(path_file)
def unzip_file(path_file): """ :rtype path_file: unicode :return: """ gunzip = Gunzip(path_file) command = gunzip.get_command() p = SubprocessRunner(command=command) p.run() p.wait(write_output_in_log=False) if p.process.returncode != 0: BColor.error("unzip p.process.returncode = %s" % p.process.returncode) return False return True
def download_file(url, data_dir): """ Скачивает файл в указанную директорию :type url: unicode :type data_dir: unicode :rtype: bool """ wget_until = Wget(url, data_dir) command = wget_until.get_command() p = SubprocessRunner(command=command) p.run() p.wait(write_output_in_log=False) if p.process.returncode != 0: BColor.error("wget p.process.returncode = %s" % p.process.returncode) return False return True
def delete_not_updated_today(count_all_domain=False): """ :type count_all_domain: bool|dict :return: """ connection = get_mysql_connection() cursor = connection.cursor(MySQLdb.cursors.DictCursor) sql_trigger_enable = "SET @TRIGGER_DISABLED = 0" sql_trigger_disable = "SET @TRIGGER_DISABLED = 1" if not count_all_domain: sql = "DELETE FROM domain WHERE load_today = 'N'" BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_disable) sql = "UPDATE domain SET load_today = 'N'" BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_enable) else: for key_tld, tld_count_in_file in count_all_domain.iteritems(): cursor.execute("SELECT count(*) as domain_count FROM domain WHERE tld = '%s'" % str(key_tld)) count_in_base = cursor.fetchone() BColor.process("Count zone (%s) in file %s, in base %s" % (str(key_tld), str(tld_count_in_file), str(count_in_base['domain_count']))) if count_in_base and int(count_in_base['domain_count']) >= int(tld_count_in_file): sql = "DELETE FROM domain WHERE load_today = 'N' AND tld = '%s'" % str(key_tld) BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_disable) sql = "UPDATE domain SET load_today = 'N' WHERE tld = '%s'" % str(key_tld) BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_enable) else: BColor.error("TLD %s - count in file %s, count in base %s" % (str(key_tld), str(count_in_base), str(tld_count_in_file))) connection.commit() connection.close()
def download_data_for_current_date(): """ Скачивает все необходимы файлы для парсинга С R01 данные по локальным зонам https://partner.r01.ru/zones/ru_domains.gz https://partner.r01.ru/zones/su_domains.gz https://partner.r01.ru/zones/rf_domains.gz С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге http://phpsuxx.blogspot.com/2011/12/full-bgp.html http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html для остальных зоне можно посмотреть http://csa.ee/databases-zone-files/ :rtype: unicode """ now_date = datetime.date.today() delta = datetime.timedelta(days=1) now_date = now_date - delta files_list = [{'url': 'https://partner.r01.ru/zones/ru_domains.gz', 'file_name': 'ru_domains.gz'}, {'url': 'https://partner.r01.ru/zones/su_domains.gz', 'file_name': 'su_domains.gz'}, {'url': 'https://partner.r01.ru/zones/rf_domains.gz', 'file_name': 'rf_domains.gz'}, {'url': 'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2' % (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")), 'file_name': 'rib.bz2'}] path = Downloader.create_data_dir() for item in files_list: path_file = os.path.abspath(os.path.join(path, item['file_name'])) BColor.process("Download %s to %s " % (item['url'], path_file)) shutil.rmtree(path_file, ignore_errors=True) Downloader.download_file(item['url'], path_file) if os.path.getsize(path_file) == 0: BColor.error("Can`t download file %s to %s" % (item['url'], path_file)) raise Exception("Can`t download file %s to %s" % (item['url'], path_file)) return path
def run(self): """ Запрашиваем DNS данные :return: """ try: self.write_to_file(BColor.process("Process %s running, need work %s domains" % (self.number, len(self.domains)))) added_domains = 0 re_prefix = re.compile(r'\s*') self._connect_mysql() cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) for domain_data in self.domains: try: data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) delegated = re.sub(re_prefix, '', data[5]) if delegated == '1': delegated = 'Y' domain_dns_data_array = self._get_ns_record(domain) as_array = self._get_asn_array(domain_dns_data_array) else: delegated = 'N' domain_dns_data_array = {} as_array = {} register_info = {'registrant': re.sub(re_prefix, '', data[1]), 'register_date': re.sub(re_prefix, '', data[2]), 'register_end_date': re.sub(re_prefix, '', data[3]), 'free_date': re.sub(re_prefix, '', data[4]), 'delegated': delegated, 'domain': domain, 'prefix': domain_data['prefix']} cursor.execute("SELECT id FROM domain WHERE domain_name = LOWER('%s')" % domain) domain_id = cursor.fetchone() if not domain_id: run_sql = self._insert_domain(domain_dns_data_array, as_array, register_info) else: run_sql = self._update_domain(domain_dns_data_array, as_array, domain_id['id'], register_info) self.write_to_file(run_sql + ";", sql=True) try: cursor.execute(run_sql) self.connection.commit() except: self.write_to_file(BColor.error("MySQL exceptions (SQL %s)" % run_sql)) self.write_to_file(BColor.error(traceback.format_exc())) # try again time.sleep(5) self._connect_mysql() cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) cursor.execute(run_sql) self.connection.commit() added_domains += 1 if (added_domains % 1000) == 0: self.write_to_file(BColor.process("Thread %d success resolved %d domains" % (self.number, added_domains), pid=self.number)) # USE http://habrahabr.ru/post/178637/ data = None domain = None delegated = None domain_dns_data_array = None as_array = None register_info = None domain_id = None run_sql = None except: data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) self.write_to_file(BColor.error("Domain %s work failed process number %s" % (domain, self.number))) self.write_to_file(BColor.error(traceback.format_exc())) self.write_to_file(BColor.process("Process %s done " % self.number)) self.connection.close() return 0 except: self.write_to_file(BColor.error("Process failed %s" % self.number)) self.write_to_file(BColor.error(traceback.format_exc())) return 1
# -*- coding: utf-8 -*- from __future__ import unicode_literals __author__ = 'Alexey Y Manikin' import sys from config.main import * PROGRAM_NAME = 'update_statistic' CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, CURRENT_DIR) logfile = os.path.join(CURRENT_DIR, '%s.debug' % PROGRAM_NAME) import traceback from helpers.helpersCollor import BColor from classes.statistic import Statistic if __name__ == "__main__": try: statistic = Statistic() statistic.update_all_statistic() except Exception as e: BColor.error("Got an exception: %s" % e.message) print(traceback.format_exc())
def run(self): """ Запрашиваем DNS данные :return: """ try: self.write_to_file( BColor.process("Process %s running, need work %s domains" % (self.number, len(self.domains)))) added_domains = 0 re_prefix = re.compile(r'\s*') self._connect_mysql() cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) # rpki = RpkiChecker() for domain_data in self.domains: try: data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) delegated = re.sub(re_prefix, '', data[5]) if delegated == '1': delegated = 'Y' domain_dns_data_array = self.get_ns_record(domain) as_array = self._get_asn_array(domain_dns_data_array) # try: # status = rpki.check_ip(domain_dns_data_array['a'][0], as_array[0]) # rpki_status = status['code'] # except: # rpki_status = -2 rpki_status = -2 else: delegated = 'N' domain_dns_data_array = {} as_array = {} rpki_status = -2 register_info = { 'registrant': re.sub(re_prefix, '', data[1]), 'register_date': re.sub(re_prefix, '', data[2]), 'register_end_date': re.sub(re_prefix, '', data[3]), 'free_date': re.sub(re_prefix, '', data[4]), 'delegated': delegated, 'domain': domain, 'prefix': domain_data['prefix'] } cursor.execute( "SELECT id FROM domain WHERE domain_name = LOWER('%s')" % domain) domain_id = cursor.fetchone() if not domain_id: run_sql = self._insert_domain(domain_dns_data_array, as_array, register_info, rpki_status, cursor) else: run_sql = self._update_domain(domain_dns_data_array, as_array, domain_id['id'], register_info, rpki_status, cursor) run_sql = run_sql.replace("b\'", '') run_sql = run_sql.replace("\'\'", '\'') self.write_to_file(run_sql + ";", sql=True) try: cursor.execute(run_sql) self.connection.commit() except Exception: self.write_to_file( BColor.error("MySQL exceptions (SQL %s)" % run_sql)) self.write_to_file(BColor.error( traceback.format_exc())) # try again time.sleep(5) self._connect_mysql() cursor = self.connection.cursor( MySQLdb.cursors.DictCursor) cursor.execute(run_sql) self.connection.commit() added_domains += 1 if (added_domains % 1000) == 0: self.write_to_file( BColor.process( "Thread %d success resolved %d domains" % (self.number, added_domains), pid=self.number)) # READ http://habrahabr.ru/post/178637/ data = None domain = None delegated = None domain_dns_data_array = None as_array = None register_info = None domain_id = None run_sql = None except Exception: pprint.pprint(domain_data) data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) self.write_to_file( BColor.error( "Domain %s work failed process number %s" % (domain, self.number))) self.write_to_file(BColor.error(traceback.format_exc())) self.write_to_file(BColor.process("Process %s done " % self.number)) self.connection.close() return 0 except Exception: self.write_to_file(BColor.error("Process failed %s" % self.number)) self.write_to_file(BColor.error(traceback.format_exc())) return 1
def load_prefix_list_from_var(prefix_list): """ Загрузка данных из переменной :return: """ subnet_list_tree = SubnetTree.SubnetTree() for index in prefix_list: subnet_list_tree[as_bytes(index)] = as_bytes(prefix_list[index]) return subnet_list_tree if __name__ == "__main__": try: if check_prog_run(PROGRAM_NAME): BColor.error("Program %s already running" % PROGRAM_NAME) sys.exit(1) parser = argparse.ArgumentParser(add_help=True, version='1.0') parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store") parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count") parser.add_argument('-u', '--update_statistic', help="Update statistic after update domain", action="count") parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store") parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store") args = parser.parse_args() if args.show_verbose: BColor.ok("Use verbose") if not args.dir:
def load_prefix_list_from_var(prefix_list): """ Загрузка данных из переменной :return: """ subnet_list_tree = SubnetTree.SubnetTree() for index in prefix_list: subnet_list_tree[as_bytes(index)] = as_bytes(prefix_list[index]) return subnet_list_tree if __name__ == "__main__": try: if check_prog_run(PROGRAM_NAME): BColor.error("Program %s already running" % PROGRAM_NAME) sys.exit(1) parser = argparse.ArgumentParser(add_help=True, version='1.0') parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store") parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count") parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store") parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store") args = parser.parse_args() if args.show_verbose: BColor.ok("Use verbose") if not args.dir: BColor.process("Download files")
def run(self): """ Запрашиваем DNS данные :return: """ self.write_to_file(BColor.process("Process %s running" % self.number)) added_domains = 0 re_prefix = re.compile(r'\s*') start_time = datetime.now() try: self._connect_mysql() cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) #rpki = RpkiChecker() while not self.queue.empty(): domain_data = self.queue.get(timeout=5) try: data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) delegated = re.sub(re_prefix, '', data[5]) if delegated == '1': delegated = 'Y' domain_dns_data_array = self.get_ns_record(domain) as_array = self._get_asn_array(domain_dns_data_array) # try: # status = rpki.check_ip(domain_dns_data_array['a'][0], as_array[0]) # rpki_status = status['code'] # except: # rpki_status = -2 rpki_status = -2 else: delegated = 'N' domain_dns_data_array = {} as_array = {} rpki_status = -2 register_info = { 'registrant': re.sub(re_prefix, '', data[1]), 'register_date': re.sub(re_prefix, '', data[2]), 'register_end_date': re.sub(re_prefix, '', data[3]), 'free_date': re.sub(re_prefix, '', data[4]), 'delegated': delegated, 'domain': domain, 'prefix': domain_data['prefix'] } run_sql = self._update_domain_row(domain_dns_data_array, as_array, register_info, rpki_status) run_sql = run_sql.replace("b\'", '') run_sql = run_sql.replace("\'\'", '\'') self.write_to_file(run_sql + ";", sql=True) try: cursor.execute(run_sql) self.connection.commit() except Exception: self.write_to_file( BColor.error("MySQL exceptions (SQL %s)" % run_sql)) self.write_to_file(BColor.error( traceback.format_exc())) # try again time.sleep(5) self._connect_mysql() cursor = self.connection.cursor( MySQLdb.cursors.DictCursor) cursor.execute(run_sql) self.connection.commit() added_domains += 1 # READ http://habrahabr.ru/post/178637/ data = None domain = None delegated = None domain_dns_data_array = None as_array = None register_info = None domain_id = None run_sql = None except Exception: data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) self.write_to_file( BColor.error( "Domain %s work failed process number %i" % (domain, self.number))) self.write_to_file(BColor.error(traceback.format_exc())) diff = datetime.now() - start_time performance = diff.seconds / added_domains self.queue_statistic.put({ 'time_diff': diff.seconds, 'performance': performance, 'count': added_domains }) self.write_to_file( BColor.process( "Process %i done, processed %i domain (performance %f)" % (self.number, added_domains, performance), pid=self.number)) self.connection.close() return 0 except queue.Empty: diff = datetime.now() - start_time performance = diff.seconds / added_domains self.queue_statistic.put({ 'time_diff': diff.seconds, 'performance': performance, 'count': added_domains }) self.write_to_file( BColor.process( "Process %i done queue is Empty = %i, processed %i domain (performance %f)" % (self.number, self.queue.empty(), added_domains, performance), pid=self.number)) return 0 except Exception: self.write_to_file( BColor.error("Process failed %i" % self.number, pid=self.number)) self.write_to_file(BColor.error(traceback.format_exc())) return 1
def load_prefix_list_from_var(prefix_list): """ Загрузка данных из переменной :return: """ subnet_list_tree = SubnetTree.SubnetTree() for index in prefix_list: subnet_list_tree[index] = prefix_list[index] return subnet_list_tree if __name__ == "__main__": try: if check_program_run(PROGRAM_NAME): BColor.error("Program %s already running" % PROGRAM_NAME) sys.exit(1) parser = argparse.ArgumentParser(add_help=True) parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store") parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count") parser.add_argument('-u', '--update_statistic',