Пример #1
0
    def download_data_for_current_date():
        """
        Скачивает все необходимы файлы для парсинга

        С R01 данные по локальным зонам
        https://partner.r01.ru/zones/ru_domains.gz
        https://partner.r01.ru/zones/su_domains.gz
        https://partner.r01.ru/zones/rf_domains.gz

        С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге
        http://phpsuxx.blogspot.com/2011/12/full-bgp.html
        http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html

        для остальных зоне можно посмотреть
        http://csa.ee/databases-zone-files/

        :rtype: unicode
        """
        now_date = datetime.date.today()
        delta = datetime.timedelta(days=1)
        now_date = now_date - delta

        files_list = [{
            'url': 'https://partner.r01.ru/zones/ru_domains.gz',
            'file_name': 'ru_domains.gz'
        }, {
            'url': 'https://partner.r01.ru/zones/su_domains.gz',
            'file_name': 'su_domains.gz'
        }, {
            'url': 'https://partner.r01.ru/zones/rf_domains.gz',
            'file_name': 'rf_domains.gz'
        }, {
            'url':
            'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2' %
            (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")),
            'file_name':
            'rib.bz2'
        }]

        path = Downloader.create_data_dir()

        for item in files_list:
            path_file = os.path.abspath(os.path.join(path, item['file_name']))
            BColor.process("Download %s to %s " % (item['url'], path_file))
            shutil.rmtree(path_file, ignore_errors=True)
            Downloader.download_file(item['url'], path_file)
            if os.path.getsize(path_file) == 0:
                BColor.error("Can`t download file %s to %s" %
                             (item['url'], path_file))
                raise Exception("Can`t download file %s to %s" %
                                (item['url'], path_file))

        return path
Пример #2
0
    def download_data_for_current_date() -> str:
        """
        Скачивает все необходимы файлы для парсинга

        С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге
        http://phpsuxx.blogspot.com/2011/12/full-bgp.html
        http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html

        для остальных зоне можно посмотреть
        http://csa.ee/databases-zone-files/

        :rtype: unicode
        """
        now_date = datetime.date.today()
        delta = datetime.timedelta(days=1)
        now_date = now_date - delta

        files_list = [{
            'url': 'https://ru-tld.ru/files/RU_Domains_ru-tld.ru.gz',
            'file_name': 'ru_domains.gz'
        }, {
            'url': 'https://ru-tld.ru/files/SU_Domains_ru-tld.ru.gz',
            'file_name': 'su_domains.gz'
        }, {
            'url': 'https://ru-tld.ru/files/RF_Domains_ru-tld.ru.gz',
            'file_name': 'rf_domains.gz'
        }, {
            'url':
            'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2' %
            (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")),
            'file_name':
            'rib.bz2'
        }]

        path = Downloader.create_data_dir()

        with concurrent.futures.ThreadPoolExecutor(
                max_workers=len(files_list)) as executor:
            future_to_download = {
                executor.submit(Downloader.download, path, item): item
                for item in files_list
            }
            for future in concurrent.futures.as_completed(future_to_download,
                                                          timeout=1800):
                item = future_to_download[future]
                file_name = item['file_name']
                url = item['url']
                array_data = future.result()
                BColor.ok("Download url %s to %s, size is %i" %
                          (url, file_name, array_data))

        return path
Пример #3
0
    def _normalization_delete_record(self):
        """
        Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован,
        у него должна быть одна история
        :return:
        """
        cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)

        sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history
WHERE domain_id NOT IN (SELECT id FROM domain)"""
        cursor.execute(sql)
        data = cursor.fetchall()

        count_deleted_domain = len(data)
        current_domain = 0
        count_not_update = 0
        count_update = 0

        if self.show_log:
            BColor.ok("All deleted domain is %s" % count_deleted_domain)

        for row in data:
            if current_domain % 10000 == 1:
                if self.show_log:
                    updated_percent = round(count_update /
                                            (current_domain / 100))
                    BColor.process(
                        "Current domain %s/%s (updated %s percent)" %
                        (current_domain, count_deleted_domain,
                         updated_percent))

                self.connection.commit()

            sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % (
                row['domain_id'])
            cursor.execute(sql)
            domain_history = cursor.fetchone()

            sql = "SELECT id FROM domain WHERE domain_name = '%s'" % (
                domain_history['domain_name'])
            cursor.execute(sql)
            domain = cursor.fetchone()

            if domain:
                sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % (
                    domain['id'], row['domain_id'])
                cursor.execute(sql_update)
                count_update += 1
            else:
                count_not_update += 1

            current_domain += 1
Пример #4
0
    def download(path: str, item: dict):
        """
        :return:
        """
        file_name = item['file_name']
        url = item['url']
        path_file = os.path.abspath(os.path.join(path, file_name))

        BColor.process("Download %s to %s " % (url, path_file))
        shutil.rmtree(path_file, ignore_errors=True)
        Downloader.download_file(url, path_file)
        if os.path.getsize(path_file) == 0:
            BColor.error("Can`t download file %s to %s" % (url, path_file))
            raise Exception("Can`t download file %s to %s" % (url, path_file))

        return os.path.getsize(path_file)
Пример #5
0
    def unzip_file(path_file):
        """
        :rtype path_file: unicode
        :return:
        """
        gunzip = Gunzip(path_file)
        command = gunzip.get_command()

        p = SubprocessRunner(command=command)
        p.run()
        p.wait(write_output_in_log=False)
        if p.process.returncode != 0:
            BColor.error("unzip p.process.returncode = %s" % p.process.returncode)
            return False

        return True
Пример #6
0
    def unzip_file(path_file: str) -> bool:
        """
        :rtype path_file: unicode
        :return:
        """
        gunzip = Gunzip(path_file)
        command = gunzip.get_command()

        p = SubprocessRunner(command=command)
        p.run()
        p.wait(write_output_in_log=False)
        if p.process.returncode != 0:
            BColor.error("unzip p.process.returncode = %s" % p.process.returncode)
            return False

        return True
Пример #7
0
    def update_all_statistic(self):
        """
        Обновление всех статистик
        :return:
        """
        start_time = datetime.datetime.now()

        self.update_as_count_statistic()
        self.update_a_domain_old_count_statistic()
        self.update_ns_domain_old_count_statistic()
        self.update_as_domain_old_count_statistic()
        self.update_registrant_count_statistic()
        self.update_ns_count_statistic()
        self.update_mx_count_statistic()
        self.update_domain_count_statistic()
        self.update_a_count_statistic()
        self.update_cname_count_statistic()

        # beget statistic
        self.update_beget_statistic()

        self.update_provider_statistic('netangels', 44128)
        self.update_provider_statistic('timeweb', 9123)

        for process in self.process_list:
            try:
                # timeout 2 days
                process.join(1728000)
                self.process_list.remove(process)
            except KeyboardInterrupt:
                return

        # отдельно ждем пока собирется статистика по NS серверам что бы сгруперовать ее
        self.update_ns_domain_group_count_statistic()
        for process in self.process_list:
            try:
                # timeout 2 days
                process.join(1728000)
                self.process_list.remove(process)
            except KeyboardInterrupt:
                return

        diff = datetime.datetime.now() - start_time
        BColor.process("Statistic done to %i second" % diff.seconds)
Пример #8
0
    def delete_not_updated_today(count_all_domain=False):
        """
        :type count_all_domain: bool|dict
        :return:
        """
        connection = get_mysql_connection()
        cursor = connection.cursor(MySQLdb.cursors.DictCursor)
        sql_trigger_enable = "SET @TRIGGER_DISABLED = 0"
        sql_trigger_disable = "SET @TRIGGER_DISABLED = 1"

        if not count_all_domain:
            sql = "DELETE FROM domain WHERE load_today = 'N'"
            BColor.process(sql)
            cursor.execute(sql)
            cursor.execute(sql_trigger_disable)

            sql = "UPDATE domain SET load_today = 'N'"
            BColor.process(sql)
            cursor.execute(sql)
            cursor.execute(sql_trigger_enable)
        else:
            for key_tld, tld_count_in_file in count_all_domain.items():
                cursor.execute(
                    "SELECT count(*) as domain_count FROM domain WHERE tld = '%s'"
                    % PREFIX_LIST_ZONE[key_tld])
                count_in_base = cursor.fetchone()
                BColor.process("Count zone (%s) in file %s, in base %s" %
                               (str(key_tld), str(tld_count_in_file),
                                str(count_in_base['domain_count'])))

                sql = "DELETE FROM domain WHERE load_today = 'N' AND tld = '%s'" % PREFIX_LIST_ZONE[
                    key_tld]
                BColor.process(sql)
                cursor.execute(sql)
                cursor.execute(sql_trigger_disable)

                sql = "UPDATE domain SET load_today = 'N' WHERE tld = '%s'" % PREFIX_LIST_ZONE[
                    key_tld]
                BColor.process(sql)
                cursor.execute(sql)
                cursor.execute(sql_trigger_enable)

        connection.commit()
        connection.close()
Пример #9
0
    def download_file(url, data_dir):
        """
        Скачивает файл в указанную директорию
        :type url: unicode
        :type data_dir: unicode
        :rtype: bool
        """

        wget_until = Wget(url, data_dir)
        command = wget_until.get_command()

        p = SubprocessRunner(command=command)
        p.run()
        p.wait(write_output_in_log=False)
        if p.process.returncode != 0:
            BColor.error("wget p.process.returncode = %s" % p.process.returncode)
            return False

        return True
Пример #10
0
    def download_file(url, data_dir):
        """
        Скачивает файл в указанную директорию
        :type url: unicode
        :type data_dir: unicode
        :rtype: bool
        """

        wget_until = Wget(url, data_dir)
        command = wget_until.get_command()

        p = SubprocessRunner(command=command)
        p.run()
        p.wait(write_output_in_log=False)
        if p.process.returncode != 0:
            BColor.error("wget p.process.returncode = %s" % p.process.returncode)
            return False

        return True
Пример #11
0
    def delete_not_updated_today():
        """
        :return:
        """
        connection = get_mysql_connection()
        cursor = connection.cursor(MySQLdb.cursors.DictCursor)
        sql_trigger_enable = "SET @TRIGGER_DISABLED = 0"
        sql_trigger_disable = "SET @TRIGGER_DISABLED = 1"

        sql = "DELETE FROM rpki WHERE load_today = 'N'"
        BColor.process(sql)
        cursor.execute(sql)
        cursor.execute(sql_trigger_disable)

        sql = "UPDATE rpki SET load_today = 'N'"
        BColor.process(sql)
        cursor.execute(sql)
        cursor.execute(sql_trigger_enable)

        connection.commit()
        connection.close()
Пример #12
0
    def download_data_for_current_date():
        """
        Скачивает все необходимы файлы для парсинга

        С R01 данные по локальным зонам
        https://partner.r01.ru/zones/ru_domains.gz
        https://partner.r01.ru/zones/su_domains.gz
        https://partner.r01.ru/zones/rf_domains.gz

        С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге
        http://phpsuxx.blogspot.com/2011/12/full-bgp.html
        http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html

        для остальных зоне можно посмотреть
        http://csa.ee/databases-zone-files/

        :rtype: unicode
        """
        now_date = datetime.date.today()
        delta = datetime.timedelta(days=1)
        now_date = now_date - delta

        files_list = [{'url': 'https://partner.r01.ru/zones/ru_domains.gz', 'file_name': 'ru_domains.gz'},
                      {'url': 'https://partner.r01.ru/zones/su_domains.gz', 'file_name': 'su_domains.gz'},
                      {'url': 'https://partner.r01.ru/zones/rf_domains.gz', 'file_name': 'rf_domains.gz'},
                      {'url': 'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2'
                              % (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")), 'file_name': 'rib.bz2'}]

        path = Downloader.create_data_dir()

        for item in files_list:
            path_file = os.path.abspath(os.path.join(path, item['file_name']))
            BColor.process("Download %s to %s " % (item['url'], path_file))
            shutil.rmtree(path_file, ignore_errors=True)
            Downloader.download_file(item['url'], path_file)
            if os.path.getsize(path_file) == 0:
                BColor.error("Can`t download file %s to %s" % (item['url'], path_file))
                raise Exception("Can`t download file %s to %s" % (item['url'], path_file))

        return path
Пример #13
0
    def run(self):
        """
        Запрашиваем DNS данные
        :return:
        """

        self.write_to_file(BColor.process("Process %s running" % self.number))
        added_domains = 0
        re_prefix = re.compile(r'\s*')
        start_time = datetime.now()

        try:
            self._connect_mysql()
            cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)
            #rpki = RpkiChecker()

            while not self.queue.empty():
                domain_data = self.queue.get(timeout=5)
                try:
                    data = domain_data['line'].split("\t")

                    domain = re.sub(re_prefix, '', data[0])
                    delegated = re.sub(re_prefix, '', data[5])

                    if delegated == '1':
                        delegated = 'Y'
                        domain_dns_data_array = self.get_ns_record(domain)
                        as_array = self._get_asn_array(domain_dns_data_array)

                        # try:
                        #     status = rpki.check_ip(domain_dns_data_array['a'][0], as_array[0])
                        #     rpki_status = status['code']
                        # except:
                        #     rpki_status = -2

                        rpki_status = -2
                    else:
                        delegated = 'N'
                        domain_dns_data_array = {}
                        as_array = {}
                        rpki_status = -2

                    register_info = {
                        'registrant': re.sub(re_prefix, '', data[1]),
                        'register_date': re.sub(re_prefix, '', data[2]),
                        'register_end_date': re.sub(re_prefix, '', data[3]),
                        'free_date': re.sub(re_prefix, '', data[4]),
                        'delegated': delegated,
                        'domain': domain,
                        'prefix': domain_data['prefix']
                    }

                    run_sql = self._update_domain_row(domain_dns_data_array,
                                                      as_array, register_info,
                                                      rpki_status)

                    run_sql = run_sql.replace("b\'", '')
                    run_sql = run_sql.replace("\'\'", '\'')

                    self.write_to_file(run_sql + ";", sql=True)

                    try:
                        cursor.execute(run_sql)
                        self.connection.commit()
                    except Exception:
                        self.write_to_file(
                            BColor.error("MySQL exceptions (SQL %s)" %
                                         run_sql))
                        self.write_to_file(BColor.error(
                            traceback.format_exc()))

                        # try again
                        time.sleep(5)
                        self._connect_mysql()
                        cursor = self.connection.cursor(
                            MySQLdb.cursors.DictCursor)
                        cursor.execute(run_sql)
                        self.connection.commit()

                    added_domains += 1

                    # READ http://habrahabr.ru/post/178637/
                    data = None
                    domain = None
                    delegated = None
                    domain_dns_data_array = None
                    as_array = None
                    register_info = None
                    domain_id = None
                    run_sql = None

                except Exception:
                    data = domain_data['line'].split("\t")
                    domain = re.sub(re_prefix, '', data[0])

                    self.write_to_file(
                        BColor.error(
                            "Domain %s work failed process number %i" %
                            (domain, self.number)))
                    self.write_to_file(BColor.error(traceback.format_exc()))

            diff = datetime.now() - start_time
            performance = diff.seconds / added_domains

            self.queue_statistic.put({
                'time_diff': diff.seconds,
                'performance': performance,
                'count': added_domains
            })
            self.write_to_file(
                BColor.process(
                    "Process %i done, processed %i domain (performance %f)" %
                    (self.number, added_domains, performance),
                    pid=self.number))
            self.connection.close()
            return 0
        except queue.Empty:
            diff = datetime.now() - start_time
            performance = diff.seconds / added_domains

            self.queue_statistic.put({
                'time_diff': diff.seconds,
                'performance': performance,
                'count': added_domains
            })
            self.write_to_file(
                BColor.process(
                    "Process %i done queue is Empty = %i, processed %i domain (performance %f)"
                    % (self.number, self.queue.empty(), added_domains,
                       performance),
                    pid=self.number))
            return 0
        except Exception:
            self.write_to_file(
                BColor.error("Process failed %i" % self.number,
                             pid=self.number))
            self.write_to_file(BColor.error(traceback.format_exc()))
            return 1
def load_prefix_list_from_var(prefix_list):
    """
    Загрузка данных из переменной
    :return:
    """
    subnet_list_tree = SubnetTree.SubnetTree()
    for index in prefix_list:
        subnet_list_tree[as_bytes(index)] = as_bytes(prefix_list[index])

    return subnet_list_tree


if __name__ == "__main__":
    try:
        if check_prog_run(PROGRAM_NAME):
            BColor.error("Program %s already running" % PROGRAM_NAME)
            sys.exit(1)

        parser = argparse.ArgumentParser(add_help=True, version='1.0')

        parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store")
        parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count")
        parser.add_argument('-u', '--update_statistic', help="Update statistic after update domain", action="count")
        parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store")
        parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store")
        args = parser.parse_args()

        if args.show_verbose:
            BColor.ok("Use verbose")

        if not args.dir:
Пример #15
0
def load_prefix_list_from_var(prefix_list):
    """
    Загрузка данных из переменной
    :return:
    """
    subnet_list_tree = SubnetTree.SubnetTree()
    for index in prefix_list:
        subnet_list_tree[as_bytes(index)] = as_bytes(prefix_list[index])

    return subnet_list_tree


if __name__ == "__main__":
    try:
        if check_prog_run(PROGRAM_NAME):
            BColor.error("Program %s already running" % PROGRAM_NAME)
            sys.exit(1)

        parser = argparse.ArgumentParser(add_help=True, version='1.0')

        parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store")
        parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count")
        parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store")
        parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store")
        args = parser.parse_args()

        if args.show_verbose:
            BColor.ok("Use verbose")

        if not args.dir:
            BColor.process("Download files")
Пример #16
0
    def run(self):
        """
        Запрашиваем DNS данные
        :return:
        """

        try:
            self.write_to_file(BColor.process("Process %s running, need work %s domains"
                                              % (self.number, len(self.domains))))

            added_domains = 0
            re_prefix = re.compile(r'\s*')
            self._connect_mysql()
            cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)

            for domain_data in self.domains:
                try:
                    data = domain_data['line'].split("\t")

                    domain = re.sub(re_prefix, '', data[0])
                    delegated = re.sub(re_prefix, '', data[5])

                    if delegated == '1':
                        delegated = 'Y'
                        domain_dns_data_array = self._get_ns_record(domain)
                        as_array = self._get_asn_array(domain_dns_data_array)
                    else:
                        delegated = 'N'
                        domain_dns_data_array = {}
                        as_array = {}

                    register_info = {'registrant': re.sub(re_prefix, '', data[1]),
                                     'register_date': re.sub(re_prefix, '', data[2]),
                                     'register_end_date': re.sub(re_prefix, '', data[3]),
                                     'free_date': re.sub(re_prefix, '', data[4]),
                                     'delegated': delegated,
                                     'domain': domain,
                                     'prefix': domain_data['prefix']}

                    cursor.execute("SELECT id FROM domain WHERE domain_name = LOWER('%s')" % domain)
                    domain_id = cursor.fetchone()

                    if not domain_id:
                        run_sql = self._insert_domain(domain_dns_data_array, as_array, register_info)
                    else:
                        run_sql = self._update_domain(domain_dns_data_array, as_array, domain_id['id'],
                                                      register_info)

                    self.write_to_file(run_sql + ";", sql=True)

                    try:
                        cursor.execute(run_sql)
                        self.connection.commit()
                    except:
                        self.write_to_file(BColor.error("MySQL exceptions (SQL %s)" % run_sql))
                        self.write_to_file(BColor.error(traceback.format_exc()))

                        # try again
                        time.sleep(5)
                        self._connect_mysql()
                        cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)
                        cursor.execute(run_sql)
                        self.connection.commit()

                    added_domains += 1

                    if (added_domains % 1000) == 0:
                        self.write_to_file(BColor.process("Thread %d success resolved %d domains"
                                                          % (self.number, added_domains), pid=self.number))

                    # USE http://habrahabr.ru/post/178637/
                    data = None
                    domain = None
                    delegated = None
                    domain_dns_data_array = None
                    as_array = None
                    register_info = None
                    domain_id = None
                    run_sql = None


                except:
                    data = domain_data['line'].split("\t")
                    domain = re.sub(re_prefix, '', data[0])

                    self.write_to_file(BColor.error("Domain %s work failed process number %s" % (domain, self.number)))
                    self.write_to_file(BColor.error(traceback.format_exc()))

            self.write_to_file(BColor.process("Process %s done " % self.number))
            self.connection.close()
            return 0
        except:
            self.write_to_file(BColor.error("Process failed %s" % self.number))
            self.write_to_file(BColor.error(traceback.format_exc()))
            return 1
Пример #17
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

__author__ = 'Alexey Y Manikin'

import sys
from config.main import *

PROGRAM_NAME = 'update_statistic'
CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))

sys.path.insert(0, CURRENT_DIR)
logfile = os.path.join(CURRENT_DIR, '%s.debug' % PROGRAM_NAME)

import traceback
from helpers.helpersCollor import BColor
from classes.statistic import Statistic

if __name__ == "__main__":
    try:
        statistic = Statistic()
        statistic.update_all_statistic()
    except Exception as e:
        BColor.error("Got an exception: %s" % e.message)
        print(traceback.format_exc())
Пример #18
0
    def run(self):
        """
        Запрашиваем DNS данные
        :return:
        """

        try:
            self.write_to_file(
                BColor.process("Process %s running, need work %s domains" %
                               (self.number, len(self.domains))))

            added_domains = 0
            re_prefix = re.compile(r'\s*')
            self._connect_mysql()
            cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)
            #   rpki = RpkiChecker()

            for domain_data in self.domains:
                try:
                    data = domain_data['line'].split("\t")

                    domain = re.sub(re_prefix, '', data[0])
                    delegated = re.sub(re_prefix, '', data[5])

                    if delegated == '1':
                        delegated = 'Y'
                        domain_dns_data_array = self.get_ns_record(domain)
                        as_array = self._get_asn_array(domain_dns_data_array)
                        # try:
                        #     status = rpki.check_ip(domain_dns_data_array['a'][0], as_array[0])
                        #     rpki_status = status['code']
                        # except:
                        #     rpki_status = -2

                        rpki_status = -2
                    else:
                        delegated = 'N'
                        domain_dns_data_array = {}
                        as_array = {}
                        rpki_status = -2

                    register_info = {
                        'registrant': re.sub(re_prefix, '', data[1]),
                        'register_date': re.sub(re_prefix, '', data[2]),
                        'register_end_date': re.sub(re_prefix, '', data[3]),
                        'free_date': re.sub(re_prefix, '', data[4]),
                        'delegated': delegated,
                        'domain': domain,
                        'prefix': domain_data['prefix']
                    }

                    cursor.execute(
                        "SELECT id FROM domain WHERE domain_name = LOWER('%s')"
                        % domain)
                    domain_id = cursor.fetchone()

                    if not domain_id:
                        run_sql = self._insert_domain(domain_dns_data_array,
                                                      as_array, register_info,
                                                      rpki_status, cursor)
                    else:
                        run_sql = self._update_domain(domain_dns_data_array,
                                                      as_array,
                                                      domain_id['id'],
                                                      register_info,
                                                      rpki_status, cursor)

                    run_sql = run_sql.replace("b\'", '')
                    run_sql = run_sql.replace("\'\'", '\'')

                    self.write_to_file(run_sql + ";", sql=True)

                    try:
                        cursor.execute(run_sql)
                        self.connection.commit()
                    except Exception:
                        self.write_to_file(
                            BColor.error("MySQL exceptions (SQL %s)" %
                                         run_sql))
                        self.write_to_file(BColor.error(
                            traceback.format_exc()))

                        # try again
                        time.sleep(5)
                        self._connect_mysql()
                        cursor = self.connection.cursor(
                            MySQLdb.cursors.DictCursor)
                        cursor.execute(run_sql)
                        self.connection.commit()

                    added_domains += 1

                    if (added_domains % 1000) == 0:
                        self.write_to_file(
                            BColor.process(
                                "Thread %d success resolved %d domains" %
                                (self.number, added_domains),
                                pid=self.number))

                    # READ http://habrahabr.ru/post/178637/
                    data = None
                    domain = None
                    delegated = None
                    domain_dns_data_array = None
                    as_array = None
                    register_info = None
                    domain_id = None
                    run_sql = None

                except Exception:

                    pprint.pprint(domain_data)

                    data = domain_data['line'].split("\t")
                    domain = re.sub(re_prefix, '', data[0])

                    self.write_to_file(
                        BColor.error(
                            "Domain %s work failed process number %s" %
                            (domain, self.number)))
                    self.write_to_file(BColor.error(traceback.format_exc()))

            self.write_to_file(BColor.process("Process %s done " %
                                              self.number))
            self.connection.close()
            return 0

        except Exception:
            self.write_to_file(BColor.error("Process failed %s" % self.number))
            self.write_to_file(BColor.error(traceback.format_exc()))
            return 1
Пример #19
0
    def _normalization_delete_record(self):
        """
        Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован,
        у него должна быть одна история
        :return:
        """
        cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)

        if self.show_log:
            BColor.ok("Select deleted domain from domain_history")

        sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history
WHERE domain_id NOT IN (SELECT id FROM domain)"""
        cursor.execute(sql)
        data = cursor.fetchall()

        count_deleted_domain = len(data)
        current_domain = 0
        count_not_update = 0
        count_update = 0

        if self.show_log:
            BColor.ok("All deleted domain is %s" % count_deleted_domain)

        for row in data:
            if self.show_log:
                BColor.process("Current domain %s/%s" % (current_domain, count_deleted_domain))
                BColor.ok("Updated %s, not updated %s" % (count_update, count_not_update))

            sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % (row['domain_id'])
            BColor.warning(sql)
            cursor.execute(sql)
            domain_history = cursor.fetchone()

            sql = "SELECT id FROM domain WHERE domain_name = '%s'" % (domain_history['domain_name'])
            BColor.warning(sql)
            cursor.execute(sql)
            domain = cursor.fetchone()

            if domain:
                if self.show_log:
                    BColor.warning("Domain %s (%s) has new domain_id = %s" % (domain_history['domain_name'],
                                                                              row['domain_id'],
                                                                              domain['id']))

                sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % (domain['id'],
                                                                                                row['domain_id'])
                cursor.execute(sql_update)
                count_update += 1
            else:
                count_not_update += 1

            current_domain += 1
Пример #20
0
    def start_load_and_resolver_domain(net_array,
                                       work_path,
                                       delete_old=True,
                                       count_thread=COUNT_THREAD,
                                       verbose=False,
                                       count_cycle=2,
                                       resolve_dns='127.0.0.1'):
        """
        Запускам процессы резолвинга

        :param net_array: unicode|list
        :type work_path: unicode
        :type delete_old: bool
        :type count_thread: int
        :type verbose: bool
        :type count_cycle: int
        :type resolve_dns: unicode
        :return:
        """

        if verbose:
            log_path = os.path.abspath(os.path.join(work_path, 'log'))
            if not os.path.exists(log_path):
                os.makedirs(log_path)
        else:
            log_path = False

        count_array_data = count_thread * count_cycle
        data_for_process = []
        for thread_number in range(0, count_array_data):
            data_for_process.append([])

        counter_all = {}

        for prefix in PREFIX_LIST_ZONE.keys():
            BColor.process("Load prefix_list %s " % prefix)
            file_prefix = os.path.join(work_path, prefix + "_domains")
            file_domain_data = open(file_prefix)

            BColor.process("Load file %s " % file_prefix)
            line = file_domain_data.readline()
            counter_all[prefix] = 0
            while line:
                data_for_process[counter_all[prefix] %
                                 count_array_data].append({
                                     'line': line,
                                     'prefix': prefix
                                 })
                counter_all[prefix] += 1
                line = file_domain_data.readline()

            BColor.process("All load zone %s -  %s" %
                           (prefix, counter_all[prefix]))

        process_list = []
        for i in range(0, count_array_data):
            BColor.process("Start process to work %s %s" %
                           (i, len(data_for_process[i])))
            resolver = Resolver(i, data_for_process[i], resolve_dns, net_array,
                                log_path)
            resolver.daemon = True
            process_list.append(resolver)
            resolver.start()

            if i != 0 and i % count_thread == 0:
                BColor.process("Wait for threads finish...")
                for process in process_list:
                    try:
                        # timeout 2 days
                        process.join(1728000)
                    except KeyboardInterrupt:
                        BColor.warning("Interrupted by user")
                        return
                process_list = []

        if len(process_list):
            for process in process_list:
                try:
                    # timeout 2 days
                    process.join(1728000)
                except KeyboardInterrupt:
                    BColor.warning("Interrupted by user")
                    return

        if delete_old:
            Resolver.delete_not_updated_today(counter_all)
Пример #21
0
    def start_load_and_resolver_domain(net_array: SubnetTree.SubnetTree,
                                       work_path: str,
                                       delete_old: bool = True,
                                       count_thread: int = COUNT_THREAD,
                                       verbose: bool = False,
                                       resolve_dns: str = '127.0.0.1') -> None:
        """
        Запускам процессы резолвинга
        :return:
        """

        if verbose:
            log_path = os.path.abspath(os.path.join(work_path, 'log'))
            if not os.path.exists(log_path):
                os.makedirs(log_path)
        else:
            log_path = False

        # Разбиваем все домены в файлах на N массивов
        # пример формата строки из файла
        # 0--2.RU	REGRU-RU	15.06.2019	15.06.2020	16.07.2020	1
        queue_data = multiprocessing.Queue(MAX_DOMAIN_COUNT)
        queue_statistic = multiprocessing.Queue(count_thread + 5)
        counter_all = {}

        for prefix in PREFIX_LIST_ZONE.keys():
            BColor.process("Load prefix_list %s " % prefix)
            file_prefix = os.path.join(work_path, prefix + "_domains")
            file_domain_data = open(file_prefix)

            BColor.process("Load file %s " % file_prefix)
            line = file_domain_data.readline()
            counter_all[prefix] = 0
            while line:
                queue_data.put({'line': line, 'prefix': prefix})
                counter_all[prefix] += 1
                line = file_domain_data.readline()

                # if counter_all[prefix] > 10000:
                #     break

            BColor.process("All load zone %s -  %s" %
                           (prefix, counter_all[prefix]))

        # Запускаем процессы парсинга доменов
        start_time = datetime.now()
        registrant_mutex = multiprocessing.Lock()
        process_list = []

        dist_ip = multiprocessing.Manager().dict()
        for i in range(0, count_thread):
            resolver = Resolver(i, queue_data, resolve_dns, net_array,
                                log_path, registrant_mutex, queue_statistic,
                                dist_ip)
            resolver.daemon = True
            process_list.append(resolver)
            resolver.start()

        # Вывод информации о процессе парсинга доменов
        status_prefix = os.path.join(work_path, "status_parse_domain")
        process_status = Status(queue_data, status_prefix)
        process_status.daemon = True
        process_status.start()

        if len(process_list):
            for process in process_list:
                try:
                    # timeout 2 days
                    process.join(1728000)
                except KeyboardInterrupt:
                    BColor.warning("Interrupted by user")
                    return

        process_status.join(10)

        queue_data.close()
        diff = datetime.now() - start_time

        all_time = 0
        all_count = 0
        while not queue_statistic.empty():
            statistic_data = queue_statistic.get()
            all_time += statistic_data['time_diff']
            all_count += statistic_data['count']

        performance_per_process = all_time / all_count
        performance = diff.seconds / all_count
        BColor.process(
            "Performance %f per process, total time %i per process, total count %i, performance %f, all time %i"
            % (performance_per_process, all_time, all_count, performance,
               diff.seconds))

        # После обработки всех доменов запускаем удаление доменов которые сегодня не обновлены
        if delete_old:
            Resolver.delete_not_updated_today(counter_all)
Пример #22
0
    def delete_not_updated_today(count_all_domain=False):
        """
        :type count_all_domain: bool|dict
        :return:
        """
        connection = get_mysql_connection()
        cursor = connection.cursor(MySQLdb.cursors.DictCursor)
        sql_trigger_enable = "SET @TRIGGER_DISABLED = 0"
        sql_trigger_disable = "SET @TRIGGER_DISABLED = 1"

        if not count_all_domain:
            sql = "DELETE FROM domain WHERE load_today = 'N'"
            BColor.process(sql)
            cursor.execute(sql)
            cursor.execute(sql_trigger_disable)

            sql = "UPDATE domain SET load_today = 'N'"
            BColor.process(sql)
            cursor.execute(sql)
            cursor.execute(sql_trigger_enable)
        else:
            for key_tld, tld_count_in_file in count_all_domain.iteritems():
                cursor.execute("SELECT count(*) as domain_count FROM domain WHERE tld = '%s'" % str(key_tld))
                count_in_base = cursor.fetchone()
                BColor.process("Count zone (%s) in file %s, in base %s"
                               % (str(key_tld), str(tld_count_in_file), str(count_in_base['domain_count'])))

                if count_in_base and int(count_in_base['domain_count']) >= int(tld_count_in_file):
                    sql = "DELETE FROM domain WHERE load_today = 'N' AND tld = '%s'" % str(key_tld)
                    BColor.process(sql)
                    cursor.execute(sql)
                    cursor.execute(sql_trigger_disable)

                    sql = "UPDATE domain SET load_today = 'N' WHERE tld = '%s'" % str(key_tld)
                    BColor.process(sql)
                    cursor.execute(sql)
                    cursor.execute(sql_trigger_enable)
                else:
                    BColor.error("TLD %s - count in file %s, count in base %s"
                                 % (str(key_tld), str(count_in_base), str(tld_count_in_file)))
        connection.commit()
        connection.close()
Пример #23
0
import traceback
from helpers.helpers import check_prog_run
from classes.resolver import Resolver
from helpers.helpersCollor import BColor


def print_log(log_flag, text):
    """
    Выводим сообщение в консоль или лог
    :type log_flag: bool
    :type text: unicode
    :return:
    """
    if log_flag:
        print text


if __name__ == "__main__":
    show_log = True
    try:
        if check_prog_run(PROGRAM_NAME):
            BColor.error("Program %s already running" % PROGRAM_NAME)
            sys.exit(1)

        Resolver.delete_not_updated_today()

    except Exception as e:
        BColor.error("Got an exception: %s" % e.message)
        print traceback.format_exc()
Пример #24
0
    def start_load_and_resolver_domain(net_array, work_path, delete_old=True, count=COUNT_THREAD, verbose=False,
                                       count_cycle=10, resolve_dns='127.0.0.1'):
        """
        Запускам процессы резолвинга

        :param net_array: unicode|list
        :type work_path: unicode
        :type delete_old: bool
        :type count: int
        :type verbose: bool
        :type count_cycle: int
        :type resolve_dns: unicode
        :return:
        """

        if verbose:
            log_path = os.path.abspath(os.path.join(work_path, 'log'))
            if not os.path.exists(log_path):
                os.makedirs(log_path)
        else:
            log_path = False

        count_array_data = count * count_cycle
        data_for_process = []
        for thread_number in range(0, count_array_data):
            data_for_process.append([])

        counter_all = {}

        for prefix in PREFIX_LIST:
            BColor.process("Load prefix_list %s " % prefix)
            file_prefix = os.path.join(work_path, prefix+"_domains")
            file_rib_data = open(file_prefix)

            BColor.process("Load file %s " % file_prefix)
            line = file_rib_data.readline()
            counter_all[prefix] = 0
            while line:
                data_for_process[counter_all[prefix] % count_array_data].append({'line': line, 'prefix': prefix})
                counter_all[prefix] += 1
                line = file_rib_data.readline()

            BColor.process("All load zone %s -  %s" % (prefix, counter_all[prefix]))

        process_list = []
        for i in range(0, count_array_data):
            BColor.process("Start process to work %s %s" % (i, len(data_for_process[i])))
            resolver = Resolver(i,  data_for_process[i], resolve_dns, net_array, log_path)
            resolver.daemon = True
            process_list.append(resolver)
            resolver.start()

            if i !=0 and i % count == 0:
                BColor.process("Wait for threads finish...")
                for process in process_list:
                    try:
                        # timeout 2 days
                        process.join(1728000)
                    except KeyboardInterrupt:
                        BColor.warning("Interrupted by user")
                        return
                process_list = []

        if len(process_list):
            for process in process_list:
                try:
                    # timeout 2 days
                    process.join(1728000)
                except KeyboardInterrupt:
                    BColor.warning("Interrupted by user")
                    return

        if delete_old:
            Resolver.delete_not_updated_today(counter_all)
logfile = os.path.join(CURRENT_DIR, '%s.debug' % PROGRAM_NAME)

import traceback
from helpers.helpers import check_prog_run
from classes.resolver import Resolver
from helpers.helpersCollor import BColor


def print_log(log_flag, text):
    """
    Выводим сообщение в консоль или лог
    :type log_flag: bool
    :type text: unicode
    :return:
    """
    if log_flag:
        print(text)

if __name__ == "__main__":
    show_log = True
    try:
        if check_prog_run(PROGRAM_NAME):
            BColor.error("Program %s already running" % PROGRAM_NAME)
            sys.exit(1)

        Resolver.delete_not_updated_today()

    except Exception as e:
        BColor.error("Got an exception: %s" % e.message)
        print(traceback.format_exc())