Пример #1
0
 def send_request(self):
     logger.info('Sending request.')
     request = self.session.sendRequest(self.cfg.XMLPathFName(),
                                        self.cfg.P7SPathFName())
     logger.info('Checking request status.')
     if request['result']:
         self.code = request['code']
         logger.info('Got code %s', self.code)
         Dump.update(value=self.code).where(
             Dump.param == 'lastCode').execute()
         Dump.update(value='sendRequest').where(
             Dump.param == 'lastAction').execute()
         Dump.update(value='Code').where(
             Dump.param == 'lastResult').execute()
         logger.info('Save code in History')
         History.create(requestCode=self.code,
                        date=datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
         self.code_id = History.get(History.requestCode == self.code).id
         return self.code
     else:
         Dump.update(value='sendRequest').where(
             Dump.param == 'lastAction').execute()
         Dump.update(value='Error').where(
             Dump.param == 'lastResult').execute()
         logger.error(request['resultComment'])
         return False
Пример #2
0
 def get_request(self):
     path_py = str(os.path.dirname(os.path.abspath(__file__)))
     logger.info('Waiting for a 90 sec.')
     time.sleep(90)
     logger.info('Trying to get result...')
     request = self.session.getResult(self.code)
     Dump.update(value='getRequest').where(
         Dump.param == 'lastAction').execute()
     max_count = self.cfg.GetResultMaxCount()
     for count in range(1, max_count + 1):
         if request['result']:
             logger.info('Got a dump ver. %s for the %s (INN %s)',
                         request['dumpFormatVersion'],
                         request['operatorName'], request['inn'])
             with open(path_py + '/result.zip', "wb") as f:
                 f.write(b64decode(request['registerZipArchive']))
             logger.info(
                 'Downloaded dump %d bytes, MD5 hashsum: %s',
                 os.path.getsize(path_py + '/result.zip'),
                 hashlib.md5(open(path_py + '/result.zip',
                                  'rb').read()).hexdigest())
             try:
                 logger.info('Unpacking.')
                 zip_file = zipfile.ZipFile(path_py + '/result.zip', 'r')
                 zip_file.extract('dump.xml', path_py + '/')
                 if self.cfg.DumpFileSave():
                     zip_file.extractall(
                         path_py + '/dumps/%s' %
                         datetime.now().strftime("%Y-%m-%d %H-%M-%S"))
                 zip_file.close()
             except zipfile.BadZipfile:
                 logger.error('Wrong file format.')
                 Dump.update(value='Error').where(
                     Dump.param == 'lastResult').execute()
                 return False
             Dump.update(value='Ok').where(
                 Dump.param == 'lastResult').execute()
             return True
         else:
             if not request['resultCode']:
                 logger.info(
                     'Not ready yet. Waiting for a minute. Attempt number %s',
                     count)
                 time.sleep(60)
             else:
                 logger.error('Got an error, code %d: %s',
                              request['resultCode'],
                              request['resultComment'])
                 Dump.update(value='Error').where(
                     Dump.param == 'lastResult').execute()
                 return False
     Dump.update(value='Error').where(Dump.param == 'lastResult').execute()
     # History.update(dump=False).where(History.id == self.code_id).execute()
     logger.info('Cant get result.')
     return False
Пример #3
0
 def get_request(self):
     path_py = str(os.path.dirname(os.path.abspath(__file__)))
     logger.info('Waiting for a 90 sec.')
     time.sleep(90)
     logger.info('Trying to get result...')
     request = self.session.getResult(self.code)
     Dump.update(value='getRequest').where(Dump.param == 'lastAction').execute()
     max_count = self.cfg.GetResultMaxCount()
     for count in range(1, max_count + 1):
         if request['result']:
             logger.info('Got a dump ver. %s for the %s (INN %s)',
                         request['dumpFormatVersion'],
                         request['operatorName'],
                         request['inn'])
             with open(path_py + '/result.zip', "wb") as f:
                 f.write(b64decode(request['registerZipArchive']))
             logger.info('Downloaded dump %d bytes, MD5 hashsum: %s',
                         os.path.getsize(path_py + '/result.zip'),
                         hashlib.md5(open(path_py + '/result.zip', 'rb')
                                     .read()).hexdigest())
             try:
                 logger.info('Unpacking.')
                 zip_file = zipfile.ZipFile(path_py + '/result.zip', 'r')
                 zip_file.extract('dump.xml', path_py + '/')
                 if self.cfg.DumpFileSave():
                     zip_file.extractall(path_py + '/dumps/%s' % datetime.now().strftime("%Y-%m-%d %H-%M-%S"))
                 zip_file.close()
             except zipfile.BadZipfile:
                 logger.error('Wrong file format.')
                 Dump.update(value='Error').where(Dump.param == 'lastResult').execute()
                 return False
             Dump.update(value='Ok').where(Dump.param == 'lastResult').execute()
             return True
         else:
             if not request['resultCode']:
                 logger.info('Not ready yet. Waiting for a minute. Attempt number %s', count)
                 time.sleep(60)
             else:
                 logger.error('Got an error, code %d: %s',
                              request['resultCode'],
                              request['resultComment'])
                 Dump.update(value='Error').where(Dump.param == 'lastResult').execute()
                 return False
     Dump.update(value='Error').where(Dump.param == 'lastResult').execute()
     logger.info('Cant get result.')
     return False
Пример #4
0
 def send_request(self):
     logger.info('Sending request.')
     request = self.session.sendRequest(self.cfg.XMLPathFName(), self.cfg.P7SPathFName())
     logger.info('Checking request status.')
     if request['result']:
         self.code = request['code']
         logger.info('Got code %s', self.code)
         Dump.update(value=self.code).where(Dump.param == 'lastCode').execute()
         Dump.update(value='sendRequest').where(Dump.param == 'lastAction').execute()
         Dump.update(value='Code').where(Dump.param == 'lastResult').execute()
         logger.info('Save code in History')
         History.create(requestCode=self.code, date=datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
         self.code_id = History.get(History.requestCode == self.code).id
         return self.code
     else:
         Dump.update(value='sendRequest').where(Dump.param == 'lastAction').execute()
         Dump.update(value='Error').where(Dump.param == 'lastResult').execute()
         logger.error(request['resultComment'])
         return False
Пример #5
0
def run():
    log.info('Start load genres.')

    genre_translate = load()

    NEED_SMS = True
    new_genres = []
    is_first_run = not genre_translate

    log.info(f'Current genres: {len(genre_translate)}')

    for genre in Dump.get_all_genres():
        if genre not in genre_translate:
            log.info(f'Added new genre: {genre!r}')
            genre_translate[genre] = None
            new_genres.append(genre)

    if new_genres:
        text = f"Added genres ({len(new_genres)}): {', '.join(new_genres)}"
        log.info(text)

        # Если это первый запуск, то смс не отправляем
        if not is_first_run:
            if NEED_SMS:
                send_sms(text, log=log)

        log.info('Save genres')

        json.dump(
            genre_translate,
            open(FILE_NAME_GENRE_TRANSLATE, 'w', encoding='utf-8'),
            ensure_ascii=False,
            indent=4
        )

    else:
        log.info('No new genres')

    log.info('Finish!')
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

from collections import defaultdict
from db import Dump

print('Total:', Dump.select().count())

genres = Dump.get_all_genres()
print(f'Genres ({len(genres)}): {genres}')

games = Dump.get_all_games()
print(f'Games ({len(games)}): {games}')

sites = Dump.get_all_sites()
print(f'Sites ({len(sites)}): {sites}')

print()

max_width = max(len(x.site) for x in Dump.select(Dump.site).distinct())
fmt_str = '    {:<%d} : {}' % max_width

game_by_dump = defaultdict(list)
for x in Dump.get():
    game_by_dump[x.name].append(x)

for game, dumps in game_by_dump.items():
    print(game)
Пример #7
0
 def check_new_dump(self):
     logger.info('Check if dump.xml has updates since last sync.')
     last_date_dump = max(self.update_dump.lastDumpDate // 1000, self.update_dump.lastDumpDateUrgently // 1000)
     current_date_dump = max(int(Dump.get(Dump.param == 'lastDumpDate').value),
                             int(Dump.get(Dump.param == 'lastDumpDateUrgently').value))
     logger.info('Current date: lastDumpDate: %s, lastDumpDateUrgently: %s',
                 datetime.fromtimestamp(int(Dump.get(Dump.param == 'lastDumpDate').value))
                 .strftime('%Y-%m-%d %H:%M:%S'),
                 datetime.fromtimestamp(int(Dump.get(Dump.param == 'lastDumpDateUrgently').value))
                 .strftime('%Y-%m-%d %H:%M:%S'))
     logger.info('Last date: lastDumpDate: %s, lastDumpDateUrgently: %s',
                 datetime.fromtimestamp(int(self.update_dump.lastDumpDate // 1000)).strftime('%Y-%m-%d %H:%M:%S'),
                 datetime.fromtimestamp(int(self.update_dump.lastDumpDateUrgently // 1000))
                 .strftime('%Y-%m-%d %H:%M:%S'))
     if last_date_dump != current_date_dump or Dump.get(Dump.param == 'lastResult').value == 'Error':
         logger.info('New dump is available.')
         # Dump.update(value=last_dump.lastDumpDate // 1000).where(Dump.param == 'lastDumpDate').execute()
         # Dump.update(value=last_dump.lastDumpDateUrgently // 1000) \
         #     .where(Dump.param == 'lastDumpDateUrgently').execute()
         Dump.update(value='getLastDumpDate').where(Dump.param == 'lastAction').execute()
         Dump.update(value='NewDump').where(Dump.param == 'lastResult').execute()
         return True
     else:
         logger.info('Dump date without changes.')
         Dump.update(value='getLastDumpDate').where(Dump.param == 'lastAction').execute()
         Dump.update(value='lastDump').where(Dump.param == 'lastResult').execute()
         return False
Пример #8
0
    def check_service_upd(self):
        msg = ''

        logger.info('Current versions: webservice: %s, dump: %s, doc: %s',
                    Dump.get(Dump.param == 'webServiceVersion').value,
                    Dump.get(Dump.param == 'dumpFormatVersion').value,
                    Dump.get(Dump.param == 'docVersion').value)
        if self.update_dump.webServiceVersion != Dump.get(Dump.param == 'webServiceVersion').value:
            logger.warning('New webservice: %s', self.update_dump.webServiceVersion)
            msg = msg + 'Current webservice:' + Dump.get(Dump.param == 'webServiceVersion').value + \
                        '\nNew webservice: ' + self.update_dump.webServiceVersion + '\n\n'
            Dump.update(value=self.update_dump.webServiceVersion).where(Dump.param == 'webServiceVersion').execute()

        if self.update_dump.dumpFormatVersion != Dump.get(Dump.param == 'dumpFormatVersion').value:
            logger.warning('New dumpFormatVersion: %s', self.update_dump.dumpFormatVersion)
            msg = msg + 'Current dumpFormatVersion: ' + Dump.get(Dump.param == 'dumpFormatVersion').value + \
                        '\nNew dumpFormatVersion: ' + self.update_dump.dumpFormatVersion + '\n\n'
            Dump.update(value=self.update_dump.dumpFormatVersion).where(Dump.param == 'dumpFormatVersion').execute()

        if self.update_dump.docVersion != Dump.get(Dump.param == 'docVersion').value:
            logger.warning('New docVersion: %s', self.update_dump.docVersion)
            msg = msg + 'Current docVersion: ' + Dump.get(Dump.param == 'docVersion').value + '\nNew docVersion: ' + \
                        self.update_dump.docVersion + '\n\n'
            Dump.update(value=self.update_dump.docVersion).where(Dump.param == 'docVersion').execute()
        # print(msg)
        return msg
Пример #9
0
    def parse_dump(self):
        if not os.path.exists(self.path_py + '/dump.xml'):
            logger.info('dump.xml not found: s%', self.path_py + '/dump.xml')
            return 0
        logger.info('dump.xml already exists.')
        tree_xml = ElementTree().parse(self.path_py + '/dump.xml')

        dt = datetime.strptime(tree_xml.attrib['updateTime'][:19], '%Y-%m-%dT%H:%M:%S')
        update_time = int(time.mktime(dt.timetuple()))
        Dump.update(value=update_time).where(Dump.param == 'lastDumpDate').execute()
        logger.info('Got updateTime: %s.', update_time)

        dt = datetime.strptime(tree_xml.attrib['updateTimeUrgently'][:19], '%Y-%m-%dT%H:%M:%S')
        update_time_urgently = int(time.mktime(dt.timetuple()))
        Dump.update(value=update_time_urgently).where(Dump.param == 'lastDumpDateUrgently').execute()
        logger.info('Got updateTimeUrgently: %s.', update_time_urgently)

        list_xml = tree_xml.findall(".//*[@id]")
        id_set_dump = set()
        id_set_db = set()
        for content_xml in list_xml:
            # print(content_xml.tag, content_xml.attrib, content_xml.text)
            id_set_dump.add(int(content_xml.attrib['id']))

        select_content_id_db = Item.select(Item.content_id).where(Item.purge >> None)
        for content_db in select_content_id_db:
            id_set_db.add(content_db.content_id)

        common_id_set = id_set_dump.intersection(id_set_db)
        delete_id_set = id_set_db.difference(common_id_set)
        add_id_set = id_set_dump.difference(common_id_set)
        # print(delete_id_set)
        # print(add_id_set)

        if len(delete_id_set) > 0:
            with self.transact.atomic():
                for del_item in delete_id_set:
                    logger.info('Full delete Item, IP, Domain, URL id: %s.', del_item)

                    Item.update(purge=self.code_id).where(Item.content_id == del_item, Item.purge >> None).execute()
                    Domain.update(purge=self.code_id).where(Domain.content_id == del_item,
                                                            Domain.purge >> None).execute()
                    URL.update(purge=self.code_id).where(URL.content_id == del_item, URL.purge >> None).execute()
                    IP.update(purge=self.code_id).where(IP.content_id == del_item, IP.purge >> None).execute()

        if len(add_id_set) > 0:
            include_time = str()
            urgency_type = int()
            entry_type = int()
            block_type = str()
            hash_value = str()
            with self.transact.atomic():
                for new_item in add_id_set:
                    logger.info('New Item, IP, Domain, URL id: %s.', new_item)
                    new_item_xml = tree_xml.find(".//content[@id='" + str(new_item) + "']")
                    for data_xml in new_item_xml.iter():
                        if data_xml.tag == 'content':
                            content_id = int(data_xml.attrib['id'])
                            try:
                                urgency_type = int(data_xml.attrib['urgencyType'])
                            except KeyError:
                                urgency_type = 0
                            include_time = self.date_time_xml_to_db(data_xml.attrib['includeTime'])
                            try:
                                block_type = data_xml.attrib['blockType']
                            except KeyError:
                                block_type = 'default'
                            entry_type = int(data_xml.attrib['entryType'])
                            hash_value = data_xml.attrib['hash']
                        if data_xml.tag == 'decision':
                            decision_date = data_xml.attrib['date']
                            decision_number = data_xml.attrib['number']
                            decision_org = data_xml.attrib['org']
                            item_new = Item(content_id=content_id, includeTime=include_time,
                                            urgencyType=urgency_type, entryType=entry_type, blockType=block_type,
                                            hashRecord=hash_value, decision_date=decision_date,
                                            decision_num=decision_number, decision_org=decision_org,
                                            add=self.code_id)
                            item_new.save()
                        if data_xml.tag == 'url':
                            if not self.only_ascii(data_xml.text):
                                url_split = str(data_xml.text).split(':')
                                url = url_split[0] + ':' + urllib.parse.quote(url_split[1])
                            else:
                                url = data_xml.text
                            URL.create(item=item_new.id, content_id=content_id, url=url, add=self.code_id)
                        if data_xml.tag == 'domain':
                            if not self.only_ascii(data_xml.text):
                                domain = (str(data_xml.text).encode('idna')).decode()
                            else:
                                domain = data_xml.text
                            Domain.create(item=item_new.id, content_id=content_id, domain=domain, add=self.code_id)
                        if data_xml.tag == 'ip':
                            ip = data_xml.text
                            IP.create(item=item_new.id, content_id=content_id, ip=ip, add=self.code_id)
                        if data_xml.tag == 'ipSubnet':
                            net = data_xml.text.split('/')
                            ip = net[0]
                            mask = net[1]
                            IP.create(item=item_new.id, content_id=content_id, ip=ip, mask=mask, add=self.code_id)

        url_db_set = set()
        url_xml_set = set()
        ip_db_set = set()
        ip_xml_set = set()
        sub_ip_xml_set = set()
        sub_ip_db_set = set()
        domain_db_set = set()
        domain_xml_set = set()
        data_update = False
        with self.transact.atomic():
            for item_xml in list_xml:
                for data_xml in item_xml.iter():
                    # print(data_xml.tag, data_xml.attrib, data_xml.text)
                    if data_xml.tag == 'content':
                        content_id = int(data_xml.attrib['id'])
                        hash_value = data_xml.attrib['hash']
                        item_db = Item.get(Item.content_id == content_id, Item.purge >> None)

                        if hash_value != item_db.hashRecord:
                            logger.info('Hashes not equal, update hash id: %s', content_id)
                            try:
                                urgency_type = int(data_xml.attrib['urgencyType'])
                            except KeyError:
                                urgency_type = 0
                            include_time = self.date_time_xml_to_db(data_xml.attrib['includeTime'])
                            try:
                                block_type = data_xml.attrib['blockType']
                            except KeyError:
                                block_type = 'default'
                            entry_type = int(data_xml.attrib['entryType'])
                            item_db.hashRecord = hash_value
                            # Item.update(purge=None).where(Item.content_id == content_id).execute()
                            data_update = True
                        else:
                            data_update = False
                            break

                    if data_xml.tag == 'decision':
                        decision_date = data_xml.attrib['date']
                        decision_number = data_xml.attrib['number']
                        decision_org = data_xml.attrib['org']
                        # print(item_db)
                        if str(item_db.includeTime) != include_time:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML includeTime: %s.', include_time)
                            logger.info('DB includeTime: %s.', item_db.includeTime)
                            item_db.includeTime = include_time
                            # Item.update(includeTime=include_time).where(Item.content_id == content_id,
                            #                                             Item.purge >> None).execute()
                        if item_db.urgencyType != urgency_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML urgencyType: %s.', urgency_type)
                            logger.info('DB urgencyType: %s.', item_db.urgencyType)
                            item_db.urgencyType = urgency_type
                            # Item.update(urgencyType=urgency_type).where(Item.content_id == content_id,
                            #                                             Item.purge >> None).execute()
                        if item_db.blockType != block_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML blockType: %s.', block_type)
                            logger.info('DB blockType: %s.', item_db.blockType)
                            item_db.blockType = block_type
                            # Item.update(blockType=block_type).where(Item.content_id == content_id,
                            #                                         Item.purge >> None).execute()
                        if item_db.entryType != entry_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML entryType: %s.', entry_type)
                            logger.info('DB entryType: %s.', item_db.entryType)
                            item_db.entryType = entry_type
                            # Item.update(entryType=entry_type).where(Item.content_id == content_id,
                            #                                         Item.purge >> None).execute()
                        if str(item_db.decision_date) != decision_date:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML date: %s.', decision_date)
                            logger.info('DB date: %s.', str(item_db.decision_date))
                            item_db.decision_date = decision_date
                            # Item.update(decision_date=decision_date).where(Item.content_id == content_id,
                            #                                                Item.purge >> None).execute()
                        if item_db.decision_num != decision_number:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML number: %s.', decision_number)
                            logger.info('DB number: %s.', item_db.decision_num)
                            item_db.decision_num = decision_number
                            # Item.update(decision_num=decision_number).where(Item.content_id == content_id,
                            #                                                 Item.purge >> None).execute()
                        if item_db.decision_org != decision_org:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML org: %s.', decision_org)
                            logger.info('DB org: %s.', item_db.decision_org)
                            item_db.decision_org = decision_org
                            # Item.update(decision_org=decision_org).where(Item.content_id == content_id,
                            #                                              Item.purge >> None).execute()

                    if data_xml.tag == 'url':
                        if not self.only_ascii(data_xml.text):
                            url_split = str(data_xml.text).split(':')
                            url = url_split[0] + ':' + urllib.parse.quote(url_split[1])
                        else:
                            url = data_xml.text
                        url_xml_set.add(url)

                    if data_xml.tag == 'domain':
                        if not self.only_ascii(data_xml.text):
                            domain = (str(data_xml.text).encode('idna')).decode()
                        else:
                            domain = data_xml.text
                        domain_xml_set.add(domain)

                    if data_xml.tag == 'ip':
                        ip_xml_set.add(data_xml.text)

                    if data_xml.tag == 'ipSubnet':
                        sub_ip_xml_set.add(data_xml.text)

                if data_update:
                    url_db = URL.select().where(URL.item == item_db.id, URL.purge >> None)

                    for url_item in url_db:
                        url_db_set.add(url_item.url)
                    if url_db_set != url_xml_set:
                        common_url_set = url_xml_set.intersection(url_db_set)
                        delete_url_set = url_db_set.difference(common_url_set)
                        add_url_set = url_xml_set.difference(common_url_set)
                        if len(delete_url_set) > 0:
                            logger.info('Delete id %s URL: %s', content_id, delete_url_set)
                            for delete_url in delete_url_set:
                                URL.update(purge=self.code_id).where(URL.item == item_db.id, URL.url == delete_url,
                                                                     URL.purge >> None).execute()
                        if len(add_url_set) > 0:
                            logger.info('Add id %s URL: %s', content_id, add_url_set)
                            for add_url in add_url_set:
                                URL.create(item=item_db.id, content_id=item_db.content_id, url=add_url,
                                           add=self.code_id)
                    url_db_set.clear()
                    url_xml_set.clear()

                    domain_db = Domain.select().where(Domain.item == item_db.id, Domain.purge >> None)

                    for domain_item in domain_db:
                        domain_db_set.add(domain_item.domain)
                    if domain_db_set != domain_xml_set:
                        common_domain_set = domain_xml_set.intersection(domain_db_set)
                        delete_domain_set = domain_db_set.difference(common_domain_set)
                        add_domain_set = domain_xml_set.difference(common_domain_set)
                        if len(delete_domain_set) > 0:
                            logger.info('Delete id %s Domain: %s', content_id, delete_domain_set)
                            for delete_domain in delete_domain_set:
                                Domain.update(purge=self.code_id).where(Domain.item == item_db.id,
                                                                        Domain.domain == delete_domain,
                                                                        Domain.purge >> None).execute()
                        if len(add_domain_set) > 0:
                            logger.info('Add id %s Domain: %s', content_id, add_domain_set)
                            for add_domain in add_domain_set:
                                Domain.create(item=item_db.id, content_id=item_db.content_id, domain=add_domain,
                                              add=self.code_id)
                    domain_db_set.clear()
                    domain_xml_set.clear()

                    ip_db = IP.select().where(IP.item == item_db.id, IP.mask == 32, IP.purge >> None)

                    for ip_item in ip_db:
                        ip_db_set.add(ip_item.ip)
                    if ip_db_set != ip_xml_set:
                        common_ip_set = ip_xml_set.intersection(ip_db_set)
                        delete_ip_set = ip_db_set.difference(common_ip_set)
                        add_ip_set = ip_xml_set.difference(common_ip_set)
                        if len(delete_ip_set) > 0:
                            logger.info('Delete id %s ip: %s', content_id, delete_ip_set)
                            for delete_ip in delete_ip_set:
                                IP.update(purge=self.code_id).where(IP.item == item_db.id, IP.ip == delete_ip,
                                                                    IP.mask == 32, IP.purge >> None).execute()
                        if len(add_ip_set) > 0:
                            logger.info('Add id %s ip: %s', content_id, add_ip_set)
                            for add_ip in add_ip_set:
                                IP.create(item=item_db.id, content_id=item_db.content_id, ip=add_ip,
                                          add=self.code_id)
                    ip_db_set.clear()
                    ip_xml_set.clear()

                    sub_ip_db = IP.select().where(IP.item == item_db.id, IP.mask < 32, IP.purge >> None)

                    for sub_ip_item in sub_ip_db:
                        sub_ip_db_set.add(str(sub_ip_item.ip) + '/' + str(sub_ip_item.mask))
                    if sub_ip_db_set != sub_ip_xml_set:
                        common_sub_ip_set = sub_ip_xml_set.intersection(sub_ip_db_set)
                        delete_sub_ip_set = sub_ip_db_set.difference(common_sub_ip_set)
                        add_sub_ip_set = sub_ip_xml_set.difference(common_sub_ip_set)
                        if len(delete_sub_ip_set) > 0:
                            logger.info('Delete id %s subnet: %s', content_id, delete_sub_ip_set)
                            for delete_sub_ip in delete_sub_ip_set:
                                del_subnet = str(delete_sub_ip).split('/')
                                del_ip = del_subnet[0]
                                del_mask = del_subnet[1]
                                IP.update(purge=self.code_id).where(IP.item == item_db.id, IP.ip == del_ip,
                                                                    IP.mask == del_mask, IP.purge >> None).execute()
                        if len(add_sub_ip_set) > 0:
                            logger.info('Add id %s subnet: %s', content_id, add_sub_ip_set)
                            for add_sub_ip in add_sub_ip_set:
                                add_subnet = str(add_sub_ip).split('/')
                                add_ip = add_subnet[0]
                                add_mask = add_subnet[1]
                                IP.create(item=item_db.id, content_id=item_db.content_id, ip=add_ip, mask=add_mask,
                                          add=self.code_id)
                    item_db.save()
                    sub_ip_db_set.clear()
                    sub_ip_xml_set.clear()

        if self.check_diff():
            self.cleaner()
            return 1
        else:
            logger.info('no updates')
            self.cleaner()
            return 2
Пример #10
0
    def check_new_dump(self):
        logger.info('Check if dump.xml has updates since last sync.')

        if self.cfg.lastDumpDateUrgently() and not self.cfg.lastDumpDate():
            last_date_dump = self.update_dump.lastDumpDateUrgently // 1000
            current_date_dump = int(
                Dump.get(Dump.param == 'lastDumpDateUrgently').value)

        elif self.cfg.lastDumpDate() and not self.cfg.lastDumpDateUrgently():
            last_date_dump = self.update_dump.lastDumpDate // 1000
            current_date_dump = int(
                Dump.get(Dump.param == 'lastDumpDate').value)
        else:
            last_date_dump = max(self.update_dump.lastDumpDate // 1000,
                                 self.update_dump.lastDumpDateUrgently // 1000)
            current_date_dump = max(
                int(Dump.get(Dump.param == 'lastDumpDate').value),
                int(Dump.get(Dump.param == 'lastDumpDateUrgently').value))

        logger.info(
            'Current date: lastDumpDate: %s, lastDumpDateUrgently: %s',
            datetime.fromtimestamp(
                int(Dump.get(Dump.param == 'lastDumpDate').value)).strftime(
                    '%Y-%m-%d %H:%M:%S'),
            datetime.fromtimestamp(
                int(Dump.get(Dump.param == 'lastDumpDateUrgently').value)).
            strftime('%Y-%m-%d %H:%M:%S'))
        logger.info(
            'Last date: lastDumpDate: %s, lastDumpDateUrgently: %s',
            datetime.fromtimestamp(int(self.update_dump.lastDumpDate //
                                       1000)).strftime('%Y-%m-%d %H:%M:%S'),
            datetime.fromtimestamp(
                int(self.update_dump.lastDumpDateUrgently //
                    1000)).strftime('%Y-%m-%d %H:%M:%S'))
        if last_date_dump != current_date_dump or Dump.get(
                Dump.param == 'lastResult').value == 'Error':
            logger.info('New dump is available.')
            # Dump.update(value=last_dump.lastDumpDate // 1000).where(Dump.param == 'lastDumpDate').execute()
            # Dump.update(value=last_dump.lastDumpDateUrgently // 1000) \
            #     .where(Dump.param == 'lastDumpDateUrgently').execute()
            Dump.update(value='getLastDumpDate').where(
                Dump.param == 'lastAction').execute()
            Dump.update(value='NewDump').where(
                Dump.param == 'lastResult').execute()
            return True
        else:
            logger.info('Dump date without changes.')
            Dump.update(value='getLastDumpDate').where(
                Dump.param == 'lastAction').execute()
            Dump.update(value='lastDump').where(
                Dump.param == 'lastResult').execute()
            return False
Пример #11
0
    def check_service_upd(self):
        msg = ''

        logger.info('Current versions: webservice: %s, dump: %s, doc: %s',
                    Dump.get(Dump.param == 'webServiceVersion').value,
                    Dump.get(Dump.param == 'dumpFormatVersion').value,
                    Dump.get(Dump.param == 'docVersion').value)
        if self.update_dump.webServiceVersion != Dump.get(
                Dump.param == 'webServiceVersion').value:
            logger.warning('New webservice: %s',
                           self.update_dump.webServiceVersion)
            msg = msg + 'Current webservice:' + Dump.get(Dump.param == 'webServiceVersion').value + \
                        '\nNew webservice: ' + self.update_dump.webServiceVersion + '\n\n'
            Dump.update(value=self.update_dump.webServiceVersion).where(
                Dump.param == 'webServiceVersion').execute()

        if self.update_dump.dumpFormatVersion != Dump.get(
                Dump.param == 'dumpFormatVersion').value:
            logger.warning('New dumpFormatVersion: %s',
                           self.update_dump.dumpFormatVersion)
            msg = msg + 'Current dumpFormatVersion: ' + Dump.get(Dump.param == 'dumpFormatVersion').value + \
                        '\nNew dumpFormatVersion: ' + self.update_dump.dumpFormatVersion + '\n\n'
            Dump.update(value=self.update_dump.dumpFormatVersion).where(
                Dump.param == 'dumpFormatVersion').execute()

        if self.update_dump.docVersion != Dump.get(
                Dump.param == 'docVersion').value:
            logger.warning('New docVersion: %s', self.update_dump.docVersion)
            msg = msg + 'Current docVersion: ' + Dump.get(Dump.param == 'docVersion').value + '\nNew docVersion: ' + \
                        self.update_dump.docVersion + '\n\n'
            Dump.update(value=self.update_dump.docVersion).where(
                Dump.param == 'docVersion').execute()
        # print(msg)
        return msg
Пример #12
0
    def parse_dump(self):
        if not os.path.exists(self.path_py + '/dump.xml'):
            logger.info('dump.xml not found: s%', self.path_py + '/dump.xml')
            return 0
        logger.info('dump.xml already exists.')
        tree_xml = ElementTree().parse(self.path_py + '/dump.xml')

        dt = datetime.strptime(tree_xml.attrib['updateTime'][:19],
                               '%Y-%m-%dT%H:%M:%S')
        update_time = int(time.mktime(dt.timetuple()))
        Dump.update(value=update_time).where(
            Dump.param == 'lastDumpDate').execute()
        logger.info('Got updateTime: %s.', update_time)

        dt = datetime.strptime(tree_xml.attrib['updateTimeUrgently'][:19],
                               '%Y-%m-%dT%H:%M:%S')
        update_time_urgently = int(time.mktime(dt.timetuple()))
        Dump.update(value=update_time_urgently).where(
            Dump.param == 'lastDumpDateUrgently').execute()
        logger.info('Got updateTimeUrgently: %s.', update_time_urgently)

        list_xml = tree_xml.findall(".//*[@id]")
        id_set_dump = set()
        id_set_db = set()
        for content_xml in list_xml:
            # print(content_xml.tag, content_xml.attrib, content_xml.text)
            id_set_dump.add(int(content_xml.attrib['id']))

        select_content_id_db = Item.select(
            Item.content_id).where(Item.purge >> None)
        for content_db in select_content_id_db:
            id_set_db.add(content_db.content_id)

        common_id_set = id_set_dump.intersection(id_set_db)
        delete_id_set = id_set_db.difference(common_id_set)
        add_id_set = id_set_dump.difference(common_id_set)
        # print(delete_id_set)
        # print(add_id_set)

        if len(delete_id_set) > 0:
            with self.transact.atomic():
                for del_item in delete_id_set:
                    logger.info('Full delete Item, IP, Domain, URL id: %s.',
                                del_item)

                    Item.update(purge=self.code_id).where(
                        Item.content_id == del_item,
                        Item.purge >> None).execute()
                    Domain.update(purge=self.code_id).where(
                        Domain.content_id == del_item,
                        Domain.purge >> None).execute()
                    URL.update(purge=self.code_id).where(
                        URL.content_id == del_item,
                        URL.purge >> None).execute()
                    IP.update(purge=self.code_id).where(
                        IP.content_id == del_item, IP.purge >> None).execute()

        if len(add_id_set) > 0:
            include_time = str()
            urgency_type = int()
            entry_type = int()
            block_type = str()
            hash_value = str()
            with self.transact.atomic():
                for new_item in add_id_set:
                    logger.info('New Item, IP, Domain, URL id: %s.', new_item)
                    new_item_xml = tree_xml.find(".//content[@id='" +
                                                 str(new_item) + "']")
                    for data_xml in new_item_xml.iter():
                        if data_xml.tag == 'content':
                            content_id = int(data_xml.attrib['id'])
                            try:
                                urgency_type = int(
                                    data_xml.attrib['urgencyType'])
                            except KeyError:
                                urgency_type = 0
                            include_time = self.date_time_xml_to_db(
                                data_xml.attrib['includeTime'])
                            try:
                                block_type = data_xml.attrib['blockType']
                            except KeyError:
                                block_type = 'default'
                            entry_type = int(data_xml.attrib['entryType'])
                            hash_value = data_xml.attrib['hash']
                        if data_xml.tag == 'decision':
                            decision_date = data_xml.attrib['date']
                            decision_number = data_xml.attrib['number']
                            decision_org = data_xml.attrib['org']
                            item_new = Item(content_id=content_id,
                                            includeTime=include_time,
                                            urgencyType=urgency_type,
                                            entryType=entry_type,
                                            blockType=block_type,
                                            hashRecord=hash_value,
                                            decision_date=decision_date,
                                            decision_num=decision_number,
                                            decision_org=decision_org,
                                            add=self.code_id)
                            item_new.save()
                        if data_xml.tag == 'url':
                            if not self.only_ascii(data_xml.text):
                                url_split = str(data_xml.text).split(':')
                                url = url_split[0] + ':' + urllib.parse.quote(
                                    url_split[1])
                            else:
                                url = data_xml.text
                            URL.create(item=item_new.id,
                                       content_id=content_id,
                                       url=url,
                                       add=self.code_id)
                        if data_xml.tag == 'domain':
                            if not self.only_ascii(data_xml.text):
                                domain = (str(
                                    data_xml.text).encode('idna')).decode()
                            else:
                                domain = data_xml.text
                            Domain.create(item=item_new.id,
                                          content_id=content_id,
                                          domain=domain,
                                          add=self.code_id)
                        if data_xml.tag == 'ip':
                            ip = data_xml.text
                            IP.create(item=item_new.id,
                                      content_id=content_id,
                                      ip=ip,
                                      add=self.code_id)
                        if data_xml.tag == 'ipSubnet':
                            net = data_xml.text.split('/')
                            ip = net[0]
                            mask = net[1]
                            IP.create(item=item_new.id,
                                      content_id=content_id,
                                      ip=ip,
                                      mask=mask,
                                      add=self.code_id)

        url_db_set = set()
        url_xml_set = set()
        ip_db_set = set()
        ip_xml_set = set()
        sub_ip_xml_set = set()
        sub_ip_db_set = set()
        domain_db_set = set()
        domain_xml_set = set()
        data_update = False
        with self.transact.atomic():
            for item_xml in list_xml:
                for data_xml in item_xml.iter():
                    # print(data_xml.tag, data_xml.attrib, data_xml.text)
                    if data_xml.tag == 'content':
                        content_id = int(data_xml.attrib['id'])
                        hash_value = data_xml.attrib['hash']
                        item_db = Item.get(Item.content_id == content_id,
                                           Item.purge >> None)

                        if hash_value != item_db.hashRecord:
                            logger.info('Hashes not equal, update hash id: %s',
                                        content_id)
                            try:
                                urgency_type = int(
                                    data_xml.attrib['urgencyType'])
                            except KeyError:
                                urgency_type = 0
                            include_time = self.date_time_xml_to_db(
                                data_xml.attrib['includeTime'])
                            try:
                                block_type = data_xml.attrib['blockType']
                            except KeyError:
                                block_type = 'default'
                            entry_type = int(data_xml.attrib['entryType'])
                            item_db.hashRecord = hash_value
                            # Item.update(purge=None).where(Item.content_id == content_id).execute()
                            data_update = True
                        else:
                            data_update = False
                            break

                    if data_xml.tag == 'decision':
                        decision_date = data_xml.attrib['date']
                        decision_number = data_xml.attrib['number']
                        decision_org = data_xml.attrib['org']
                        # print(item_db)
                        if str(item_db.includeTime) != include_time:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML includeTime: %s.', include_time)
                            logger.info('DB includeTime: %s.',
                                        item_db.includeTime)
                            item_db.includeTime = include_time
                            # Item.update(includeTime=include_time).where(Item.content_id == content_id,
                            #                                             Item.purge >> None).execute()
                        if item_db.urgencyType != urgency_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML urgencyType: %s.', urgency_type)
                            logger.info('DB urgencyType: %s.',
                                        item_db.urgencyType)
                            item_db.urgencyType = urgency_type
                            # Item.update(urgencyType=urgency_type).where(Item.content_id == content_id,
                            #                                             Item.purge >> None).execute()
                        if item_db.blockType != block_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML blockType: %s.', block_type)
                            logger.info('DB blockType: %s.', item_db.blockType)
                            item_db.blockType = block_type
                            # Item.update(blockType=block_type).where(Item.content_id == content_id,
                            #                                         Item.purge >> None).execute()
                        if item_db.entryType != entry_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML entryType: %s.', entry_type)
                            logger.info('DB entryType: %s.', item_db.entryType)
                            item_db.entryType = entry_type
                            # Item.update(entryType=entry_type).where(Item.content_id == content_id,
                            #                                         Item.purge >> None).execute()
                        if str(item_db.decision_date) != decision_date:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML date: %s.', decision_date)
                            logger.info('DB date: %s.',
                                        str(item_db.decision_date))
                            item_db.decision_date = decision_date
                            # Item.update(decision_date=decision_date).where(Item.content_id == content_id,
                            #                                                Item.purge >> None).execute()
                        if item_db.decision_num != decision_number:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML number: %s.', decision_number)
                            logger.info('DB number: %s.', item_db.decision_num)
                            item_db.decision_num = decision_number
                            # Item.update(decision_num=decision_number).where(Item.content_id == content_id,
                            #                                                 Item.purge >> None).execute()
                        if item_db.decision_org != decision_org:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML org: %s.', decision_org)
                            logger.info('DB org: %s.', item_db.decision_org)
                            item_db.decision_org = decision_org
                            # Item.update(decision_org=decision_org).where(Item.content_id == content_id,
                            #                                              Item.purge >> None).execute()

                    if data_xml.tag == 'url':
                        if not self.only_ascii(data_xml.text):
                            url_split = str(data_xml.text).split(':')
                            url = url_split[0] + ':' + urllib.parse.quote(
                                url_split[1])
                        else:
                            url = data_xml.text
                        url_xml_set.add(url)

                    if data_xml.tag == 'domain':
                        if not self.only_ascii(data_xml.text):
                            domain = (str(
                                data_xml.text).encode('idna')).decode()
                        else:
                            domain = data_xml.text
                        domain_xml_set.add(domain)

                    if data_xml.tag == 'ip':
                        ip_xml_set.add(data_xml.text)

                    if data_xml.tag == 'ipSubnet':
                        sub_ip_xml_set.add(data_xml.text)

                if data_update:
                    url_db = URL.select().where(URL.item == item_db.id,
                                                URL.purge >> None)

                    for url_item in url_db:
                        url_db_set.add(url_item.url)
                    if url_db_set != url_xml_set:
                        common_url_set = url_xml_set.intersection(url_db_set)
                        delete_url_set = url_db_set.difference(common_url_set)
                        add_url_set = url_xml_set.difference(common_url_set)
                        if len(delete_url_set) > 0:
                            logger.info('Delete id %s URL: %s', content_id,
                                        delete_url_set)
                            for delete_url in delete_url_set:
                                URL.update(purge=self.code_id).where(
                                    URL.item == item_db.id,
                                    URL.url == delete_url,
                                    URL.purge >> None).execute()
                        if len(add_url_set) > 0:
                            logger.info('Add id %s URL: %s', content_id,
                                        add_url_set)
                            for add_url in add_url_set:
                                URL.create(item=item_db.id,
                                           content_id=item_db.content_id,
                                           url=add_url,
                                           add=self.code_id)
                    url_db_set.clear()
                    url_xml_set.clear()

                    domain_db = Domain.select().where(
                        Domain.item == item_db.id, Domain.purge >> None)

                    for domain_item in domain_db:
                        domain_db_set.add(domain_item.domain)
                    if domain_db_set != domain_xml_set:
                        common_domain_set = domain_xml_set.intersection(
                            domain_db_set)
                        delete_domain_set = domain_db_set.difference(
                            common_domain_set)
                        add_domain_set = domain_xml_set.difference(
                            common_domain_set)
                        if len(delete_domain_set) > 0:
                            logger.info('Delete id %s Domain: %s', content_id,
                                        delete_domain_set)
                            for delete_domain in delete_domain_set:
                                Domain.update(purge=self.code_id).where(
                                    Domain.item == item_db.id,
                                    Domain.domain == delete_domain,
                                    Domain.purge >> None).execute()
                        if len(add_domain_set) > 0:
                            logger.info('Add id %s Domain: %s', content_id,
                                        add_domain_set)
                            for add_domain in add_domain_set:
                                Domain.create(item=item_db.id,
                                              content_id=item_db.content_id,
                                              domain=add_domain,
                                              add=self.code_id)
                    domain_db_set.clear()
                    domain_xml_set.clear()

                    ip_db = IP.select().where(IP.item == item_db.id,
                                              IP.mask == 32, IP.purge >> None)

                    for ip_item in ip_db:
                        ip_db_set.add(ip_item.ip)
                    if ip_db_set != ip_xml_set:
                        common_ip_set = ip_xml_set.intersection(ip_db_set)
                        delete_ip_set = ip_db_set.difference(common_ip_set)
                        add_ip_set = ip_xml_set.difference(common_ip_set)
                        if len(delete_ip_set) > 0:
                            logger.info('Delete id %s ip: %s', content_id,
                                        delete_ip_set)
                            for delete_ip in delete_ip_set:
                                IP.update(purge=self.code_id).where(
                                    IP.item == item_db.id, IP.ip == delete_ip,
                                    IP.mask == 32, IP.purge >> None).execute()
                        if len(add_ip_set) > 0:
                            logger.info('Add id %s ip: %s', content_id,
                                        add_ip_set)
                            for add_ip in add_ip_set:
                                IP.create(item=item_db.id,
                                          content_id=item_db.content_id,
                                          ip=add_ip,
                                          add=self.code_id)
                    ip_db_set.clear()
                    ip_xml_set.clear()

                    sub_ip_db = IP.select().where(IP.item == item_db.id,
                                                  IP.mask < 32,
                                                  IP.purge >> None)

                    for sub_ip_item in sub_ip_db:
                        sub_ip_db_set.add(
                            str(sub_ip_item.ip) + '/' + str(sub_ip_item.mask))
                    if sub_ip_db_set != sub_ip_xml_set:
                        common_sub_ip_set = sub_ip_xml_set.intersection(
                            sub_ip_db_set)
                        delete_sub_ip_set = sub_ip_db_set.difference(
                            common_sub_ip_set)
                        add_sub_ip_set = sub_ip_xml_set.difference(
                            common_sub_ip_set)
                        if len(delete_sub_ip_set) > 0:
                            logger.info('Delete id %s subnet: %s', content_id,
                                        delete_sub_ip_set)
                            for delete_sub_ip in delete_sub_ip_set:
                                del_subnet = str(delete_sub_ip).split('/')
                                del_ip = del_subnet[0]
                                del_mask = del_subnet[1]
                                IP.update(purge=self.code_id).where(
                                    IP.item == item_db.id, IP.ip == del_ip,
                                    IP.mask == del_mask,
                                    IP.purge >> None).execute()
                        if len(add_sub_ip_set) > 0:
                            logger.info('Add id %s subnet: %s', content_id,
                                        add_sub_ip_set)
                            for add_sub_ip in add_sub_ip_set:
                                add_subnet = str(add_sub_ip).split('/')
                                add_ip = add_subnet[0]
                                add_mask = add_subnet[1]
                                IP.create(item=item_db.id,
                                          content_id=item_db.content_id,
                                          ip=add_ip,
                                          mask=add_mask,
                                          add=self.code_id)
                    item_db.save()
                    sub_ip_db_set.clear()
                    sub_ip_xml_set.clear()

        if self.check_diff():
            self.cleaner()
            return 1
        else:
            logger.info('no updates')
            # print('no updates')
            return 2
Пример #13
0
__author__ = 'ipetrash'

import json

import sys
sys.path.append('..')

from db import Dump

import peewee
from playhouse.shortcuts import dict_to_model

from export import FILE_NAME_EXPORT_JSON

items = json.load(open(FILE_NAME_EXPORT_JSON, encoding='utf-8'))
print('items:', len(items))
print('Dump count before import:', Dump.select().count())

for x in items:
    try:
        dump = dict_to_model(Dump, x)
        dump.save(force_insert=True)
        print(f'Import {x}')

    except peewee.IntegrityError as e:
        # Ignore error "UNIQUE constraint failed: dump.id"
        pass

print('Current dump count:', Dump.select().count())
Пример #14
0
    def statistics_show(self, diff=0, stdout=False):

        date_time = datetime.fromtimestamp(int(Dump.get(Dump.param == 'lastDumpDate')
                                               .value)).strftime('%Y-%m-%d %H:%M:%S')

        message = 'vigruzki.rkn.gov.ru update: ' + date_time + '\n'

        url_add_sql = self._url_diff_sql(diff, 'ignore', 1)
        message += '\nURLs added: \n\n'
        for url_add in url_add_sql:
            message += url_add.url + '\n'

        ip_add_sql = self._ip_diff_sql(diff, 'ignore', 1)
        message += '\nIPs added: \n\n'
        for ip_add in ip_add_sql:
            if ip_add.mask < 32:
                message += ip_add.ip + '/' + str(ip_add.mask)
            else:
                message += ip_add.ip + '\n'

        domain_add_sql = self._domain_diff_sql(diff, 'ignore', 1)
        message += '\nDOMAINs added: \n\n'
        for domain_add in domain_add_sql:
            message += domain_add.domain + '\n'

        url_del_sql = self._url_diff_sql(diff, 'ignore', 0)
        message += '\nURLs deleted: \n\n'
        for url_del in url_del_sql:
            message += url_del.url + '\n'

        ip_del_sql = self._ip_diff_sql(diff, 'ignore', 0)
        message += '\nIPs deleted: \n\n'
        for ip_del in ip_del_sql:
            if ip_del.mask < 32:
                message += ip_del.ip + '/' + str(ip_del.mask)
            else:
                message += ip_del.ip + '\n'

        domain_del_sql = self._domain_diff_sql(diff, 'ignore', 0)
        message += '\nDOMAINs deleted: \n\n'
        for domain_del in domain_del_sql:
            message += domain_del.domain + '\n'

        rb_list = self.idx_list[:diff]
        domain_count = Domain.select(fn.Count(fn.Distinct(Domain.domain)))\
            .where(~(Domain.add << rb_list) & ((Domain.purge >> None) | (Domain.purge << rb_list))).scalar()
        url_count = URL.select(fn.Count(fn.Distinct(URL.url)))\
            .where(~(URL.add << rb_list) & ((URL.purge >> None) | (URL.purge << rb_list))).scalar()
        ip_count = IP.select(fn.Count(fn.Distinct(IP.ip)))\
            .where(~(IP.add << rb_list) & ((IP.purge >> None) | (IP.purge << rb_list))).scalar()
        id_count = Item.select(fn.Count(fn.Distinct(Item.content_id)))\
            .where(~(Item.add << rb_list) & ((Item.purge >> None) | (Item.purge << rb_list))).scalar()

        message += '\nURLs count: ' + str(url_count) + '\n'
        message += 'IPs count: ' + str(ip_count) + '\n'
        message += 'DOMAINs count: ' + str(domain_count) + '\n'
        message += 'Item count: ' + str(id_count) + '\n'

        if stdout:
            print(message)
            return False
        else:
            return message
log.info('Start.')

if Path(FILE_NAME_GAMES).exists():
    backup_file_name = str(FILE_NAME_BACKUP /
                           (DT.datetime.today().strftime('%d%m%y-%H%M%S_') +
                            Path(FILE_NAME_GAMES).name))
    shutil.copy(FILE_NAME_GAMES, backup_file_name)
    log.info(f'Save backup to: {backup_file_name}')
    log.info('')

log.info('Loading cache...')

game_by_genres = load(FILE_NAME_GAMES)
log.info(f'game_by_genres ({len(game_by_genres)}): {game_by_genres}')

new_game_by_genres = Dump.dump()
log.info(
    f'new_game_by_genres ({len(new_game_by_genres)}): {new_game_by_genres}')

genre_translate = load()
log.info(f'genre_translate ({len(genre_translate)}): {genre_translate}')

log.info('Finish loading cache.')
log.info('')

log.info('Search games...')

number = 0

for game, genres in new_game_by_genres.items():
    if game in game_by_genres:
Пример #16
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

import json
from pathlib import Path

import sys
sys.path.append('..')

from db import Dump

from playhouse.shortcuts import model_to_dict

DIR = Path(__file__).parent.resolve() / 'data'
DIR.mkdir(parents=True, exist_ok=True)

FILE_NAME_EXPORT_JSON = DIR / 'games.json'

if __name__ == '__main__':
    items = [model_to_dict(dump) for dump in Dump.select()]
    print(len(items))

    json.dump(items,
              open(FILE_NAME_EXPORT_JSON, 'w', encoding='utf-8'),
              ensure_ascii=False,
              indent=4)
Пример #17
0
def run_parser(parser, games: list, max_num_request=5):
    try:
        pauses = [
            ('15 minutes', 15 * 60),
            ('30 minutes', 30 * 60),
            ('45 minutes', 45 * 60),
            ('1 hour', 60 * 60),
        ]
        SITE_NAME = parser.get_site_name()
        timeout = 3                       # 3 seconds
        MAX_TIMEOUT = 10                  # 10 seconds
        TIMEOUT_EVERY_N_GAMES = 50        # Every 50 games
        TIMEOUT_BETWEEN_N_GAMES = 3 * 60  # 3 minutes
        number = 0

        for game_name in games:
            try:
                if Dump.exists(SITE_NAME, game_name):
                    continue

                number += 1

                num_request = 0

                while True:
                    num_request += 1
                    try:
                        if num_request == 1:
                            log.info(f'#{number}. Search genres for {game_name!r} ({SITE_NAME})')
                        else:
                            log.info(f'#{number}. Search genres for {game_name!r} ({SITE_NAME}). '
                                     f'Attempts {num_request}/{max_num_request}')

                        genres = parser.get_game_genres(game_name)
                        log.info(f'#{number}. Found genres {game_name!r} ({SITE_NAME}): {genres}')

                        Dump.add(SITE_NAME, game_name, genres)
                        counter.inc()

                        time.sleep(timeout)
                        break

                    except:
                        log.exception(f'#{number}. Error on request {num_request}/{max_num_request} ({SITE_NAME})')
                        if num_request >= max_num_request:
                            log.info(f'#{number}. Attempts ended for {game_name!r} ({SITE_NAME})')
                            break

                        pause_text, pause_secs = pauses[num_request - 1]
                        log.info(f'#{number}. Pause: {pause_text} secs')
                        time.sleep(pause_secs)

                        timeout += 1
                        if timeout > MAX_TIMEOUT:
                            timeout = MAX_TIMEOUT

                if number % TIMEOUT_EVERY_N_GAMES == 0:
                    log.info(
                        f'#{number}. Pause for every {TIMEOUT_EVERY_N_GAMES} games: {TIMEOUT_BETWEEN_N_GAMES} secs'
                    )
                    time.sleep(TIMEOUT_BETWEEN_N_GAMES)

            except:
                log.exception(f'#{number}. Error by game {game_name!r} ({SITE_NAME})')

    except:
        log.exception(f'Error:')