def add_group(self, bot, update, args): if len(args) != 2: message = "Ja, daar snap ik dus helemaal niks van. Probeer dit eens:\n" \ "/addgroup <url> <groupame>" update.message.reply_text(message) return arg_url = FeedHandler.format_url_string(string=args[0]) arg_channel = args[1] # Check if argument matches url format if not FeedHandler.is_parsable(url=arg_url): message = "Die url lijkt niet helemaal lekker!" update.message.reply_text(message) return if not arg_channel.startswith('@'): message = "Een groepnaam moet met @ starten" update.message.reply_text(message) return channels = self.db.get_channels() if any(channel[0] == arg_channel and channel[1] == arg_url for channel in channels): update.message.reply_text( "Deze url is al aanwezig voor deze groep!") return # Add the channel + url self.db.add_url(arg_url) self.db.add_channel(arg_channel, arg_url) message = "Channel en url zijn toegevoegd!" update.message.reply_text(message)
def test_is_parsable(self): url = "https://lorem-rss.herokuapp.com/feed" self.assertTrue(FeedHandler.is_parsable(url)) url = "https://google.de" self.assertFalse(FeedHandler.is_parsable(url)) url = "www.google.de" self.assertFalse(FeedHandler.is_parsable(url))
def feed_url(update, url, **chat_info): arg_url = FeedHandler.format_url_string(string=url) chat_id = update.chat.id # _check if argument matches url format if not FeedHandler.is_parsable(url=arg_url): text = "Sorry! It seems like '" + \ str(arg_url) + "' doesn't provide an RSS news feed.. Have you tried another URL from that provider?" envia_texto(bot=bot, chat_id=chat_id, text=text) return chat_id = chat_info['chat_id'] chat_name = chat_info.get('chat_name') user_id = update.from_user.id result = db.set_url_to_chat(chat_id=str(chat_id), chat_name=str(chat_name), url=url, user_id=str(user_id)) if result: text = "I successfully added " + arg_url + " to your subscriptions!" else: text = "Sorry, " + update.from_user.first_name + \ "! I already have that url with stored in your subscriptions." envia_texto(bot=bot, chat_id=chat_id, text=text)
def add(self, bot, update, args): """ Adds a rss subscription to user """ telegram_user = update.message.from_user if len(args) != 2: message = "Sorry! I could not add the entry! Please use the the command passing the following arguments:\n\n /add <url> <entryname> \n\n Here is a short example: \n\n /add http://www.feedforall.com/sample.xml ExampleEntry" update.message.reply_text(message) return arg_url = FeedHandler.format_url_string(string=args[0]) arg_url = args[0] arg_entry = args[1] # Check if argument matches url format # if not FeedHandler.is_parsable(url=arg_url): # message = "Sorry! It seems like '" + \ # str(arg_url) + "' doesn't provide an RSS news feed.. Have you tried another URL from that provider?" # update.message.reply_text(message) # return # Check if entry does not exists entries = self.db.get_urls_for_user(telegram_id=telegram_user.id) print(entries) if any(arg_url.lower() in entry for entry in entries): message = "Sorry, " + telegram_user.first_name + \ "! I already have that url with stored in your subscriptions." update.message.reply_text(message) return if any(arg_entry in entry for entry in entries): message = "Sorry! I already have an entry with name " + \ arg_entry + " stored in your subscriptions.. Please choose another entry name or delete the entry using '/remove " + arg_entry + "'" update.message.reply_text(message) return # get current web page content try: url_content = FeedHandler.get_url_content(arg_url) #print("found: " + url_content) except: message = "Sorry, cannot connect to that url, check for typos." update.message.reply_text(message) return print("start") self.db.add_user_bookmark( telegram_id=telegram_user.id, url=arg_url.lower(), alias=arg_entry, url_content=url_content) message = "I successfully added " + arg_entry + " to your subscriptions!" update.message.reply_text(message) print("finished")
def test_format_url_string(self): url = "https://lorem-rss.herokuapp.com/feed" url = FeedHandler.format_url_string(url) self.assertEqual(url, "https://lorem-rss.herokuapp.com/feed") url = "www.lorem-rss.herokuapp.com/feed" url = FeedHandler.format_url_string(url) self.assertEqual(url, "http://www.lorem-rss.herokuapp.com/feed") url = "lorem-rss.herokuapp.com/feed" url = FeedHandler.format_url_string(url) self.assertEqual(url, "http://lorem-rss.herokuapp.com/feed")
def update_feed(self, url): telegram_users = self.db.get_users_for_url(url=url[0]) for user in telegram_users: if user[6]: # is_active try: content_from_db = url[2]#self.db.get_url(url[0]) content_from_web = FeedHandler.get_url_content(url[0]) if content_from_web != content_from_db: message = "Check " + url[0] + " for new changes." self.bot.send_message(chat_id=user[0], text=message) # for post in FeedHandler.parse_feed(url[0]): # self.send_newest_messages( # url=url, post=post, user=user) except: traceback.print_exc() message = "Something went wrong when I tried to parse the URL: \n\n " + \ url[0] + "\n\nCould you please check that for me? Remove the url from your subscriptions using the /remove command, it seems like it does not work anymore!" self.bot.send_message( chat_id=user[0], text=message, parse_mode=ParseMode.HTML) #timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') self.db.update_url(url=url[0], last_updated=str( timestamp), url_content=content_from_web)
def add(self, bot, update, args): """ Adds a rss subscription to user """ telegram_user = update.message.from_user if len(args) != 2: message = "Ja, daar snap dus ik dus niks van. Probeer dit eens:\n" \ " /add <url> <naampje>" update.message.reply_text(message) return arg_url = FeedHandler.format_url_string(string=args[0]) arg_entry = args[1] # Check if argument matches url format if not FeedHandler.is_parsable(url=arg_url): message = "Die url lijkt niet helemaal lekker!" update.message.reply_text(message) return # Check if entry does not exists entries = self.db.get_urls_for_user(telegram_id=telegram_user.id) if any(arg_url.lower() in entry for entry in entries): message = "Deze url heb je al toegevoegd!" update.message.reply_text(message) return if any(arg_entry in entry for entry in entries): message = "Je hebt hetzelfde naampje gebruikt als een andere url, da ga nie" update.message.reply_text(message) return self.db.add_user_bookmark(telegram_id=telegram_user.id, url=arg_url.lower(), alias=arg_entry) message = "Hij staat erbij! Gebruik /list als je me niet gelooft" update.message.reply_text(message)
def add(self, bot, update, args): """ Adds a rss subscription to user """ telegram_user = update.message.from_user if len(args) != 2: message = "Sorry! I could not add the entry! Please use the the command passing the following arguments:\n\n /add <url> <entryname> \n\n Here is a short example: \n\n /add http://www.feedforall.com/sample.xml ExampleEntry" update.message.reply_text(message) return arg_url = args[0] arg_entry = args[1] # Check if argument matches url format feed=FeedHandler.is_parsable(url=arg_url) if not feed: message = f"Sorry! It seems like {arg_url}" + \ "' doesn't provide an RSS news feed... Have you tried another URL from that provider?" update.message.reply_text(message) return # Check if entry does not exists entries = self.db.get_urls_for_user(telegram_id=telegram_user.id) if any(arg_url.lower() in entry.lower() for entry,_ in entries): message = f"Sorry, {telegram_user.first_name}" + \ "! I already have that url stored in your subscriptions." update.message.reply_text(message) return if any(arg_entry in entry for entry in entries): message = f"Sorry! I already have an entry with name {arg_entry}" + \ " stored in your subscriptions.. Please choose another entry name or delete the entry using '/remove {arg_entry}'" update.message.reply_text(message) return urls = self.db.get_all_urls() if not (arg_url in urls): items = {} for item in feed: for key in ['summary', 'title', 'link']: if not(key in item.keys()): item[key]='' hash=CityHash64(item['summary']+item['title']+item['link']) items[hash] = {'active': True, 'last_date': DateHandler.get_datetime_now()} self.db.add_url(url=arg_url, items=items) self.db.add_user_bookmark(telegram_id=telegram_user.id, url=arg_url, alias=arg_entry) message = f"I successfully added {arg_entry} to your subscriptions!" update.message.reply_text(message)
def update_feed(self, url): try: feed = FeedHandler.parse_feed(url[0]) except: feed = False traceback.print_exc() # ??? if feed: print(f'{url[0]}:') print(f'Longitud de feed: {len(feed)}') url_items = self.db.get_url_items(url=url[0]) for item in url_items: url_items[item]['active'] = False new_items = [] for item in feed: hash = str( CityHash64(item['summary'] + item['title'] + item['link'])) if not (hash in url_items): new_items.append(item) url_items[hash] = { 'active': True, 'last_date': DateHandler.get_datetime_now() } for item, value in url_items.copy().items(): if not value['active']: print(f'Desactivando {item}') if not value['active'] and DateHandler.is_older_than_days( value['last_date'], 5): print(f'Borrando {item}') url_items.pop(item) self.db.update_url_items(url=url[0], items=url_items) telegram_users = self.db.get_users_for_url(url=url[0]) for user in telegram_users: if user[6]: # is_active if not feed: message = "Something went wrong when I tried to parse the URL: \n\n " + \ url[0] + "\n\nCould you please check that for me? Remove the url from your subscriptions using the /remove command, it seems like it does not work anymore!" self.bot.send_message(chat_id=user[0], text=message, parse_mode=ParseMode.HTML) return for post in new_items: self.send_message(post=post, user=user)
def get(self, bot, update, args): """ Manually parses an rss feed """ telegram_user = update.message.from_user if len(args) > 2: message = "To get the last news of your subscription please use /get <entryname> [optional: <count 1-10>]. Make sure you first add a feed using the /add command." update.message.reply_text(message) return if len(args) == 2: args_entry = args[0] args_count = int(args[1]) else: args_entry = args[0] args_count = 4 if not(1 <= args_count <= 10): message = "Count parameter (if used) must be between 1 and 10.\n" + \ "Usage: /get <entryname> [optional: <count 1-10>]" update.message.reply_text(message) return url = self.db.get_user_bookmark(telegram_id=telegram_user.id, alias=args_entry) if url is None: message = "I can not find an entry with label " + \ args_entry + " in your subscriptions! Please check your subscriptions using /list and use the delete command again!" update.message.reply_text(message) return entries = FeedHandler.parse_feed(url[0], args_count) for entry in entries: message = f"[{url[1]}] <a href='{entry.link}'>{entry.title}</a>" print(message) try: update.message.reply_text(message, parse_mode=ParseMode.HTML) except Unauthorized: self.db.update_user(telegram_id=telegram_user.id, is_active=0) except TelegramError: # handle all other telegram related errors pass
def update_feed(self, url): telegram_users = self.db.get_users_for_url(url=url[0]) for user in telegram_users: if user[6]: # is_active try: for post in FeedHandler.parse_feed(url[0]): self.send_newest_messages( url=url, post=post, user=user) except: traceback.print_exc() message = "Something went wrong when I tried to parse the URL: \n\n " + \ url[0] + "\n\nCould you please check that for me? Remove the url from your subscriptions using the /remove command, it seems like it does not work anymore!" self.bot.send_message( chat_id=user[0], text=message, parse_mode=ParseMode.HTML) self.db.update_url(url=url[0], last_updated=str( DateHandler.get_datetime_now()))
def update_feed(self, url): if not self._finished.isSet(): try: get_url_info = self.db.get_update_url(url) last_url = get_url_info['last_url'] date_last_url = DateHandler.parse_datetime( get_url_info['last_update']) feed = FeedHandler.parse_feed( url, 4, date_last_url + timedelta(days=-1)) for post in feed: if not hasattr(post, "published") and not hasattr( post, "daily_liturgy"): logger.warning('not published' + url) continue # for index, post in enumerate(feed): date_published = DateHandler.parse_datetime(post.published) if hasattr(post, "daily_liturgy"): if date_published > date_last_url and post.link != last_url \ and post.daily_liturgy != '': message = post.title + '\n' + post.daily_liturgy result = self.send_newest_messages(message, url) if post == feed[-1] and result: self.update_url(url=url, last_update=date_published, last_url=post.link) elif date_published > date_last_url and post.link != last_url: message = post.title + '\n' + post.link result = self.send_newest_messages(message, url) if result: self.update_url(url=url, last_update=date_published, last_url=post.link) else: pass return True, url except TypeError as e: logger.error(f"TypeError {url} {str(e)}") return False, url, 'update_feed' except TelegramError as e: logger.error( f"except update_feed TelegramError {url} {str(e)}") return False, url, 'update_feed'
def update_feed(self, url): telegram_users = self.db.get_users_for_url(url=url[0]) telegram_channels = self.db.get_channels_for_url(url=url[0]) print("Processing url: {0}".format(url[0])) try: posts = FeedHandler.parse_feed(url[0]) except ValueError: traceback.print_exc() return for post in posts: print("Processing post: {0}".format(post.id)) for user in telegram_users: if user[6]: # is_active self.send_newest_messages(url=url, post=post, user=user) for channel in telegram_channels: self.send_newest_messages(url=url, post=post, user=channel) self.db.update_url(url=url[0], last_updated=str( DateHandler.get_datetime_now()))
def update_feed(url): try: get_url_info = db.get_update_url(url) last_url = get_url_info['last_url'] date_last_url = DateHandler.parse_datetime(get_url_info['last_update']) feed = FeedHandler.parse_feed(url, 4, date_last_url + timedelta(days=-1)) for post in feed: if not hasattr(post, "published") and not hasattr( post, "daily_liturgy"): logger.warning('not published' + url) continue date_published = DateHandler.parse_datetime(post.published) if hasattr(post, "daily_liturgy"): if date_published > date_last_url and post.link != last_url \ and post.daily_liturgy != '': message = post.title + '\n' + post.daily_liturgy result = send_newest_messages(message=message, url=url, disable_page_preview=True) if post == feed[-1] and result: update_url(url=url, last_update=date_published, last_url=post.link) elif date_published > date_last_url and post.link != last_url: message = post.title + '\n' + post.link result = send_newest_messages(message=message, url=url) if result: update_url(url=url, last_update=date_published, last_url=post.link) else: pass except TypeError as _: logger.error(f"TypeError {url} {str(_)}")
def test_parse_feed_amount(self): url = "https://lorem-rss.herokuapp.com/feed" feed = FeedHandler.parse_feed(url, 5) self.assertIsNotNone(url) self.assertEqual(len(feed), 5)
from util.database import DatabaseHandler from util.feedhandler import FeedHandler from util.datehandler import DateHandler from cityhash import CityHash64 db = DatabaseHandler("resources/datastore.db") arg_url = 'http://yle.fi/uutiset/rss/paauutiset.rss' feed = FeedHandler.is_parsable(url=arg_url) items = {} for item in feed: hash = CityHash64(item['summary'] + item['title'] + item['link']) if (hash in items): print(item['link'], item['summary'], items[hash]) items[hash] = { 'active': True, 'last_date': DateHandler.get_datetime_now(), 'link': item['link'] } #self.db.add_url(url=arg_url, items=items) url_items = db.get_url_items(url=arg_url) for item in url_items: url_items[item]['active'] = False new_items = [] for item in feed: hash = CityHash64(item['summary'] + item['title'] + item['link']) if not (str(hash) in url_items):
def test_parse_feed(self): url = "https://lorem-rss.herokuapp.com/feed" feed = FeedHandler.parse_feed(url) self.assertIsNotNone(url) url = "https://lorem-rss.herokuapp.com/feed"