Пример #1
0
async def parse_feed(username, url, data_directory):
    session = config.Session()
    try:
        stop = False
        feed = feedparser.parse(url)
        for x in feed['entries']:
            if stop:
                break
            current_data = datetime.strptime(x['published'],
                                             '%a, %d %b %Y %H:%M:%S %z')
            for i, media in enumerate(x['media_thumbnail']):
                logging.info('processing {} for {}'.format(i, username))
                same_id = session.query(InstgaramImageRss).filter(
                    InstgaramImageRss.rss_webstagram_id == x['id']).all()
                # print(same_id)
                if same_id is not None and len(same_id) > 0:
                    stop = True
                    break
                current_tmp_filename = os.path.join(
                    tempfile._get_default_tempdir(),
                    next(tempfile._get_candidate_names()))
                await Utils.download(media['url'], path=current_tmp_filename)
                current_image_hash = hashlib.sha256(
                    open(current_tmp_filename, 'rb').read()).hexdigest()
                same_hash = session.query(InstgaramImageRss).filter(
                    InstgaramImageRss.image_hash == current_image_hash).all()
                # import json
                # print(json.dumps(x, indent=4))
                if same_hash:
                    os.remove(current_tmp_filename)
                    stop = True
                    break
                current_filename = urlparse(
                    x["link"]).path[3:] + "_" + str(i) + '.jpg'
                new_path = os.path.join(data_directory, current_filename)
                shutil.move(current_tmp_filename, new_path)
                current_image_rss = InstgaramImageRss()
                current_image_rss.published = current_data
                current_image_rss.local_name = current_filename
                current_image_rss.local_path = new_path
                current_image_rss.rss_webstagram_id = x['id']
                current_image_rss.summary = x['summary_detail']['value']
                current_image_rss.media_url = media['url']
                current_image_rss.image_hash = current_image_hash
                current_image_rss.creation_time = datetime.now()
                current_image_rss.link = x['link']
                current_image_rss.sended = False
                current_image_rss.username = username
                # print(current_image_rss)
                session.add(current_image_rss)
                try:
                    session.commit()
                except Exception:
                    session.rollback()
    except Exception as e:
        logging.exception(str(e))
    finally:
        session.close()
Пример #2
0
def main():
    session = config.Session()
    for x in session.query(InstagramBotDAO.Chat).all():
        print(x)

    print('''---------- subscriptions ------ ''')

    for x in session.query(InstagramBotDAO.InstagramSubscription).all():
        print(x)
    session.close()
Пример #3
0
 async def run(self):
     logging.info('starting instaloader registering')
     while True:
         session = config.Session()
         try:
             self.register()
         except Exception as e:
             logging.exception(e)
         finally:
             session.close()
         logging.info('sleeping instaloader registering...')
         await asyncio.sleep(config.TIME_SLEEP_REGISTER)
         logging.info('waking up registering')
Пример #4
0
 async def _scrape(self, username):
     session = config.Session()
     try:
         current_sunscription = session.query(InstagramSubscription).filter(InstagramSubscription.username == username).first()
         await parse_user(username)
         current_sunscription.last_check_datetime = datetime.now()
         session.add(current_sunscription)
         try:
             session.commit()
         except Exception as e:
             logging.exception(e)
             session.rollback()
     except Exception as e:
         logging.exception(e)
     finally:
         session.close()
Пример #5
0
def scrape(scrapper):
    session = config.Session()
    logging.info('starting scrapper')
    for section_id, section_name in config.sections.items():
        for theme in scrapper.get_list_themes(section_id=section_id):
            stored_themes = session.query(WatchRuDAO.WatchRuTheme)\
                .filter(WatchRuDAO.WatchRuTheme.theme_id==theme['id'] and
                        WatchRuDAO.WatchRuTheme.section_id==section_id).all()
            if len(stored_themes) == 0:
                logging.info('new theme, not stored in db -> adding')
                ret_screens = scrapper.get_screenshot(theme['url'])
                parsed_theme = WatchRuDAO.WatchRuTheme(theme_id=theme['id'],
                                                       theme_name=theme['name'],
                                                       section_id=section_id,
                                                       section=section_name,
                                                       pdf_path=ret_screens['pdf'],
                                                       screenshot_path=ret_screens['jpeg'],
                                                       last_update=theme['update_time'],
                                                       sended=False,
                                                       was_updated=False
                                                       )
                session.add(parsed_theme)
            elif len(stored_themes) == 1:
                stored_theme = stored_themes[0]
                logging.info('old theme, i need check time update')
                if theme['update_time'] <= stored_theme.last_update:
                    continue
                ret_screens = scrapper.get_screenshot(theme['url'])
                stored_theme.last_update = theme['update_time']
                stored_theme.was_updated = True
                stored_theme.sended = False
                stored_theme.pdf_path = ret_screens['pdf']
                stored_theme.screenshot_path = ret_screens['jpeg']
            else:
                logging.warning('wtf too many themes with same id and section')
                continue
            try:
                session.commit()
            except:
                session.rollback()
    scrapper.close()
    session.close()
    logging.info('finished')
Пример #6
0
 async def run(self):
     logging.info('starting subscription')
     while True:
         session = config.Session()
         try:
             logging.info('starting data')
             session.query()
             usernames = session.query(InstagramSubscription).filter(InstagramSubscription.subscribed == True).all()
             if usernames is None:
                 logging.info('sleeping')
                 await asyncio.sleep(config.TIME_SLEEP)
                 continue
             for username in usernames:
                 await  self._scrape(username.username)
         except Exception as e:
             logging.exception(e)
         finally:
             session.close()
         logging.info('sleeping')
         await asyncio.sleep(config.TIME_SLEEP)
Пример #7
0
 async def run(self):
     logging.info('starting instaloader')
     while True:
         session = config.Session()
         try:
             logging.info('starting data')
             session.query()
             usernames = session.query(InstagramSubscription).filter(
                 InstagramSubscription.subscribed == True).all()
             if usernames is None:
                 logging.info('sleeping')
                 await asyncio.sleep(config.TIME_SLEEP)
                 continue
             for username in usernames:
                 logging.info('username===={}'.format(username))
                 await self.scrape(username.username)
         except Exception as e:
             logging.exception(e)
         finally:
             session.close()
         logging.info('sleeping...')
         await asyncio.sleep(config.TIME_SLEEP_INSTALOADER_PARSER)
         logging.info('waking up')
Пример #8
0
 def register(self):
     session = config.Session()
     try:
         for username in os.listdir(self.folder_for_registering):
             current_directory = os.path.join(self.folder_for_registering,
                                              username)
             filepaths = set([])
             date_to_files = defaultdict(list)
             for file in os.listdir(current_directory):
                 current_filepath = os.path.join(current_directory, file)
                 filepaths.add(current_filepath)
                 parsed_filename = re.search(
                     '(?P<date>.*)_UTC(_((?P<index>\d+)|(?P<type>[\w|_]+)))?\.(?P<extension>jpg|json|txt|mp4)',
                     file)
                 if not parsed_filename:
                     logging.warning(
                         'ignored file = {}'.format(current_filepath))
                     continue
                 filetype = parsed_filename.group('extension')
                 if filetype not in ['jpg', 'mp4']:
                     continue
                 current_filedate = parsed_filename.group('date')
                 current_filedatedate = datetime.datetime.strptime(
                     current_filedate, "%Y-%m-%d_%H-%M-%S")
                 index = parsed_filename.group('index') or 1
                 ret = session.query(InstagramImageNoRss).\
                     filter(InstagramImageNoRss.publication_index==index).\
                     filter(InstagramImageNoRss.username==username).\
                     filter(InstagramImageNoRss.local_path==current_filepath).\
                     filter(InstagramImageNoRss.published==current_filedatedate).all()
                 if ret and len(ret) > 0:
                     continue
                 current_db_object = InstagramImageNoRss()
                 current_db_object.username = username
                 current_db_object.local_path = current_filepath
                 current_db_object.published = current_filedatedate
                 local_path_txt = os.path.join(
                     current_directory, current_filedate + '_UTC.txt')
                 if os.path.exists(local_path_txt):
                     current_db_object.local_path_txt = local_path_txt
                     with open(local_path_txt, 'r',
                               encoding='utf8') as localtxtf:
                         current_db_object.text_data = '\n'.join(
                             localtxtf.readlines())
                 local_path_json = os.path.join(
                     current_directory, current_filedate + '_UTC.json')
                 if os.path.exists(local_path_json):
                     current_db_object.local_path_json = local_path_json
                     with open(local_path_json, 'r',
                               encoding='utf8') as localjsonf:
                         current_db_object.json_data = json.dumps(
                             json.loads(' '.join(localjsonf.readlines())))
                 comments_path = os.path.join(
                     current_directory,
                     current_filedate + '_UTC_comments.json')
                 if os.path.exists(comments_path):
                     current_db_object.comments_path = comments_path
                     with open(comments_path, 'r',
                               encoding='utf8') as commentsjsonf:
                         current_db_object.comments_data = json.dumps(
                             json.loads(' '.join(
                                 commentsjsonf.readlines())))
                 location_path = os.path.join(
                     current_directory,
                     current_filedate + '_UTC_location.txt')
                 if os.path.exists(location_path):
                     current_db_object.geolocation_path = location_path
                     with open(location_path, 'r',
                               encoding='utf8') as locationf:
                         current_db_object.geolocation_data = '\n'.join(
                             locationf.readlines())
                 current_db_object.sended = False
                 current_db_object.publication_index = index
                 session.add(current_db_object)
                 try:
                     session.commit()
                 except Exception as e:
                     logging.exception(e)
                     session.rollback()
     except Exception as e:
         logging.exception(e)
     finally:
         session.close()
    async def on_chat_message(msg):
        session = config.Session()
        try:
            content_type, chat_type, chat_id = telepot.glance(msg)

            if content_type != 'text':
                return

            command = msg['text'][:].lower()
            if 'text' in msg and msg['text'] == '/start':
                new_chat_info = session.query(Chat).filter(
                    Chat.chat_id == chat_id).first()
                if new_chat_info is None:
                    new_chat_info = Chat(chat_id=chat_id,
                                         admin=False,
                                         tg_ans=str(msg))
                    await bot.sendMessage(chat_id,
                                          u'Шо?! Новый пользователь?!')
                else:
                    await bot.sendMessage(chat_id, u'А я тебя уже знаю')
                session.add(new_chat_info)
                try:
                    session.commit()
                except Exception as e:
                    logging.exception(e)
                    session.rollback()
            elif command.startswith('/client'):
                passw = re.search('\/admin\s+(?P<passw>\w+)', msg['text'])
            elif command.startswith('/admin'):
                passw = re.search('\/admin\s+(?P<passw>\w+)', msg['text'])
                if passw and passw.group('passw') == config.BOT_ADMIN_PASSWORD:
                    current_chat_info = session.query(Chat).filter(
                        Chat.chat_id == chat_id).first()
                    current_chat_info.admin = True
                    session.add(current_chat_info)
                    try:
                        session.commit()
                    except Exception as e:
                        session.rollback()
                    await bot.sendMessage(chat_id,
                                          u'Слушаю и повинуюсь, хозяин')
                    await _send_help(chat_id)
                else:
                    await bot.sendMessage(
                        chat_id, u'ЭЭЭ ТЫ КТО ТАКОЙ? ДАВАЙ ДО СВИДАНИЯ!')
            elif command == '/help':
                await _send_help(chat_id)
            elif command.startswith('/regex'):
                chats = session.query(Chat).filter(
                    Chat.chat_id == chat_id).all()
                if not (chats and len(chats) > 0):
                    return
                if command.startswith('/regex_add'):
                    regex_text = re.search('\/regex_add\s+(?P<regex_text>.*)',
                                           msg['text'])
                    if regex_text and regex_text.group('regex_text'):
                        regex_obj = Regexes(
                            chat_id=chat_id,
                            regex=regex_text.group(
                                'regex_text').rstrip().lstrip())
                        session.add(regex_obj)
                        try:
                            session.commit()
                        except Exception as e:
                            session.rollback()
                        await bot.sendMessage(
                            chat_id, u'Added regex {}'.format(regex_text))
                    else:
                        await bot.sendMessage(chat_id, u'Not found regex')
                elif command.startswith('/regex_remove'):
                    uid_remove = re.search('\/regex_remove_(?P<uid>\w+)',
                                           msg['text'])
                    if uid_remove and uid_remove.group('uid'):
                        regex_obj = session.query(Regexes).filter(
                            Regexes.id == uid_remove.group('uid')).first()
                        session.delete(regex_obj)
                        try:
                            session.commit()
                            await bot.sendMessage(
                                chat_id,
                                u'Removed regex <b>{}</b>'.format(
                                    regex_obj.regex),
                                parse_mode='html')
                        except Exception as e:
                            session.rollback()
                            await bot.sendMessage(
                                chat_id,
                                u'Error on removing regex <b>{}</b> with exception {}'
                                .format(regex_obj.regex, str(e)),
                                parse_mode='html')
                    else:
                        await bot.sendMessage(
                            chat_id, u'Error on removing regex not found uid')
                else:
                    regexes = session.query(Regexes).filter(
                        Regexes.chat_id == chat_id).all()
                    for regex in regexes:
                        await bot.sendMessage(
                            chat_id,
                            u"Регулярное выражение <b>{0}</b>. Удалить /regex_remove_{1}"
                            .format(regex.regex, regex.id),
                            parse_mode='html')
            else:
                await bot.sendMessage(chat_id, 'Неизвестная команда.')
        except Exception as e:
            logging.exception(e)
            for chat_id in [
                    x.chat_id for x in session.query(Chat).filter(
                        Chat.admin == True).all()
            ]:
                await bot.sendMessage(chat_id=chat_id,
                                      text='Exception {} with {}'.format(
                                          str(e), traceback.format_exc()))
        finally:
            session.close()
 async def send_themes():
     while True:
         logging.info('sending themes to admins')
         session = config.Session()
         try:
             themes_to_send = session.query(WatchRuTheme).filter(
                 WatchRuTheme.sended == False).all()
             for theme_to_send in themes_to_send:
                 # print(theme_to_send)
                 if theme_to_send.was_updated:
                     message = '''В секции форума <b>{}</b> обновилась тема:\n <i>{}</i>\n __Обновлена__: {}\nurl: http://forum.watch.ru/showthread.php?t={}'''\
                         .format(theme_to_send.section.replace('"', ''),
                                 theme_to_send.theme_name.replace('http://forum.watch.ru/images/market-question.png', ''),
                                 theme_to_send.last_update,
                                 theme_to_send.theme_id)
                 else:
                     message = '''В секции форума <b>{}</b> добавлена тема:\n <i>{}</i>\n __Добавлена__: {}\nurl: http://forum.watch.ru/showthread.php?t={}''' \
                         .format(theme_to_send.section.replace('"', ''),
                                 theme_to_send.theme_name.replace('http://forum.watch.ru/images/market-question.png', ''),
                                 theme_to_send.last_update,
                                 theme_to_send.theme_id)
                 for chat_id in [
                         x.chat_id for x in session.query(Chat).filter(
                             Chat.admin == True).all()
                 ]:
                     regexes = session.query(Regexes).filter(
                         Regexes.chat_id == chat_id).all()
                     if any([
                             re.search(y.regex, theme_to_send.theme_name)
                             for y in regexes
                     ]):
                         await bot.sendMessage(chat_id=chat_id,
                                               text=message,
                                               parse_mode='html')
                         try:
                             with open(theme_to_send.screenshot_path,
                                       'rb') as cf:
                                 await bot.sendDocument(
                                     chat_id=chat_id,
                                     document=cf,
                                     caption=theme_to_send.theme_name[:50])
                                 #await bot.sendPhoto(chat_id=chat_id, photo=cf, caption=theme_to_send.theme_name[:50])
                         except Exception as e:
                             logging.exception(e)
                 theme_to_send.sended = True
                 theme_to_send.was_updated = False
                 session.add(theme_to_send)
                 try:
                     session.commit()
                 except Exception as e:
                     logging.exception(e)
                     session.rollback()
         except Exception as e:
             logging.exception(e)
             for chat_id in [
                     x.chat_id for x in session.query(Chat).filter(
                         Chat.admin == True).all()
             ]:
                 await bot.sendMessage(chat_id=chat_id,
                                       text='Exception {} with {}'.format(
                                           str(e), traceback.format_exc()))
         finally:
             session.close()
         await asyncio.sleep(config.TIME_SLEEP_SENDER)