async def add(self, bot, event: Message, sess, url: str): async with client_session() as session: try: async with session.get(url) as res: data: bytes = await res.read() except aiohttp.client_exceptions.InvalidURL: await bot.say(event.channel, f'`{url}`은 올바른 URL이 아니에요!') return except aiohttp.client_exceptions.ClientConnectorError: await bot.say(event.channel, f'`{url}`에 접속할 수 없어요!') return if not data: await bot.say(event.channel, f'`{url}`은 빈 웹페이지에요!') return parser = get_format(data) if parser is None: await bot.say(event.channel, f'`{url}`은 올바른 RSS 문서가 아니에요!') return f, _ = parser(data, url) feed = RSSFeedURL() feed.channel = event.channel.id feed.url = url feed.updated_at = f.updated_at.astimezone(pytz.UTC) with sess.begin(): sess.add(feed) await bot.say(event.channel, f'<#{event.channel.id}> 채널에서 `{url}`을 구독하기 시작했어요!')
def test_parse(input_, expected): with open(os.path.join(test_suite_dir, input_), 'rb') as f: xml = f.read() if IRON_PYTHON: xml = bytes(xml) parse = get_format(xml) assert callable(parse) uri_filename = input_.rstrip('.xml') + '.uri.txt' try: with open(os.path.join(test_suite_dir, uri_filename)) as f: base_uri = f.read().strip() except (IOError, OSError): base_uri = 'http://example.com/' parsed_feed, _ = parse(xml, feed_url=base_uri) parsed_tree = fromstringlist( write(parsed_feed, canonical_order=True, hints=False) ) if IRON_PYTHON: open_ = functools.partial(io.open, encoding='utf-8') elif PY3: open_ = functools.partial(open, encoding='utf-8') else: open_ = open with open_(os.path.join(test_suite_dir, expected)) as f: expected_tree = fromstringlist(f.read() if IRON_PYTHON else f) compare_tree(expected_tree, parsed_tree)
def test_parse(input_, expected): with open(os.path.join(test_suite_dir, input_), 'rb') as f: xml = f.read() if IRON_PYTHON: xml = bytes(xml) parse = get_format(xml) assert callable(parse) uri_filename = input_.rstrip('.xml') + '.uri.txt' try: with open(os.path.join(test_suite_dir, uri_filename)) as f: base_uri = f.read().strip() except (IOError, OSError): base_uri = 'http://example.com/' parsed_feed, _ = parse(xml, feed_url=base_uri) parsed_tree = fromstringlist( write(parsed_feed, canonical_order=True, hints=False)) if IRON_PYTHON: open_ = functools.partial(io.open, encoding='utf-8') elif PY3 and sys.platform == 'win32': open_ = functools.partial(open, encoding='utf-8') else: open_ = open with open_(os.path.join(test_suite_dir, expected)) as f: expected_tree = fromstringlist(f.read() if IRON_PYTHON else f) compare_tree(expected_tree, parsed_tree)
async def crawl(bot, sess): feeds = sess.query(RssFeedSub).all() for feed in feeds: # type: RssFeedSub data = '' async with client_session() as session: try: async with session.get(feed.url) as res: data = await res.read() except aiohttp.client_exceptions.ClientConnectorError: await bot.say(feed.channel, f'*Error*: `{feed.url}`에 접속할 수 없어요!') continue if not data: await bot.say(feed.channel, f'*Error*: `{feed.url}`에 접속해도 자료를 가져올 수 없어요!') continue parser = get_format(data) if parser is None: await bot.say(feed.channel, f'*Error*: `{feed.url}`는 올바른 RSS 문서가 아니에요!') continue f, _ = parser(data, feed.url) attachments = [] for entry in reversed(f.entries): feed_updated_at = feed.updated_at.astimezone( entry.updated_at.tzinfo) if feed_updated_at < entry.updated_at: attachments.append( Attachment( fallback=('RSS Feed: ' f'{str(f.title)} - ' f'{str(entry.title)} - ' f'{entry.links[0].uri}'), title=str(entry.title), title_link=entry.links[0].uri, text=('\n'.join(str( entry.content).split('\n')[:3]))[:100], author_name=str(f.title), )) feed.updated_at = entry.updated_at if attachments: await bot.api.chat.postMessage( channel=feed.channel, attachments=attachments, as_user=True, ) with sess.begin(): sess.add(feed)
async def rss_add(bot, event: Message, sess, url: str): """ 채널에서 RSS 구독 `{PREFIX}rss-add https://item4.github.io/feed.xml` (해당 주소를 구독합니다) """ async with client_session() as session: try: async with session.get(url) as res: data: bytes = await res.read() except aiohttp.client_exceptions.InvalidURL: await bot.say(event.channel, f'`{url}`은 올바른 URL이 아니에요!') return except aiohttp.client_exceptions.ClientConnectorError: await bot.say(event.channel, f'`{url}`에 접속할 수 없어요!') return if not data: await bot.say(event.channel, f'`{url}`은 빈 웹페이지에요!') return parser = get_format(data) if parser is None: await bot.say(event.channel, f'`{url}`은 올바른 RSS 문서가 아니에요!') return f, _ = parser(data, url) feed = RssFeedSub() feed.channel = event.channel.id feed.url = url feed.updated_at = f.updated_at with sess.begin(): sess.add(feed) await bot.say(event.channel, f'<#{event.channel.id}> 채널에서 `{url}` 을 구독하기 시작했어요!')
def test_get_format(string): assert get_format(string(atom_xml)) is parse_atom assert get_format(string(rss_xml)) is parse_rss2 assert get_format(string(atom_blog)) is None assert get_format(string(rss_blog)) is None assert get_format(string(blog_with_two_feeds)) is None
pass print('There are', len(missing_inputs), 'XML files that have no paired .out.xml files:') for missing_input in sorted(missing_inputs): print('\t' + missing_input) confirm = input('Do you want to create scaffold .out.xml files? ') if not confirm.strip().lower() in ('y', 'yes'): raise SystemExit() logging.basicConfig(level=logging.DEBUG) formats = {} for filename in missing_inputs: print(filename) with open(os.path.join(test_suite_dir, filename)) as f: xml = f.read() try: parse = get_format(xml) except Exception: print('Failed to detect the format of', filename, file=sys.stderr) raise uri_filename = filename.rstrip('.xml') + '.uri.txt' try: with open(os.path.join(test_suite_dir, uri_filename)) as f: base_uri = f.read().strip() except (IOError, OSError): base_uri = 'http://example.com/' try: feed, _ = parse(xml, feed_url=base_uri) except Exception: print('Failed to parse', filename, file=sys.stderr) raise out_filename = filename.rstrip('.xml') + '.out.xml'