def setUp(self): """Adds the cog, a bot, and a message to the instance for usage in tests.""" self.bot = MockBot() self.bot.get_cog.return_value = MagicMock() self.bot.get_cog.return_value.send_log_message = AsyncMock() self.cog = TokenRemover(bot=self.bot) self.msg = MockMessage(id=555, content='') self.msg.author.__str__ = MagicMock() self.msg.author.__str__.return_value = 'lemon' self.msg.author.bot = False self.msg.author.avatar_url_as.return_value = 'picture-lemon.png' self.msg.author.id = 42 self.msg.author.mention = '@lemon' self.msg.channel.mention = "#lemonade-stand"
async def test_take_action(self, format_log_message, logger, mod_log_property): """Should delete the message and send a mod log.""" cog = TokenRemover(self.bot) mod_log = mock.create_autospec(ModLog, spec_set=True, instance=True) token = mock.create_autospec(Token, spec_set=True, instance=True) log_msg = "testing123" mod_log_property.return_value = mod_log format_log_message.return_value = log_msg await cog.take_action(self.msg, token) self.msg.delete.assert_called_once_with() self.msg.channel.send.assert_called_once_with( token_remover.DELETION_MESSAGE_TEMPLATE.format( mention=self.msg.author.mention)) format_log_message.assert_called_once_with(self.msg, token) logger.debug.assert_called_with(log_msg) self.bot.stats.incr.assert_called_once_with("tokens.removed_tokens") mod_log.ignore.assert_called_once_with(constants.Event.message_delete, self.msg.id) mod_log.send_log_message.assert_called_once_with( icon_url=constants.Icons.token_removed, colour=Colour(constants.Colours.soft_red), title="Token removed!", text=log_msg, thumbnail=self.msg.author.avatar_url_as.return_value, channel_id=constants.Channels.mod_alerts)
def test_find_token_ignores_bot_messages(self, token_re): """The token finder should ignore messages authored by bots.""" self.msg.author.bot = True return_value = TokenRemover.find_token_in_message(self.msg) self.assertIsNone(return_value) token_re.finditer.assert_not_called()
def test_find_token_no_matches(self, token_re): """None should be returned if the regex matches no tokens in a message.""" token_re.finditer.return_value = () return_value = TokenRemover.find_token_in_message(self.msg) self.assertIsNone(return_value) token_re.finditer.assert_called_once_with(self.msg.content)
async def test_on_message_ignores_dms_bots(self, find_token_in_message): """Shouldn't parse a message if it is a DM or authored by a bot.""" cog = TokenRemover(self.bot) dm_msg = MockMessage(guild=None) bot_msg = MockMessage(author=MagicMock(bot=True)) for msg in (dm_msg, bot_msg): await cog.on_message(msg) find_token_in_message.assert_not_called()
def setUp(self): """Adds the cog, a bot, and a message to the instance for usage in tests.""" self.bot = MockBot() self.cog = TokenRemover(bot=self.bot) self.msg = MockMessage(id=555, content="hello world") self.msg.channel.mention = "#lemonade-stand" self.msg.author.__str__ = MagicMock(return_value=self.msg.author.name) self.msg.author.avatar_url_as.return_value = "picture-lemon.png"
async def test_on_message_skips_missing_token(self, find_token_in_message, take_action): """Shouldn't take action if a valid token isn't found when a message is sent.""" cog = TokenRemover(self.bot) find_token_in_message.return_value = False await cog.on_message(self.msg) find_token_in_message.assert_called_once_with(self.msg) take_action.assert_not_awaited()
async def test_on_message_takes_action(self, find_token_in_message, take_action): """Should take action if a valid token is found when a message is sent.""" cog = TokenRemover(self.bot) found_token = "foobar" find_token_in_message.return_value = found_token await cog.on_message(self.msg) find_token_in_message.assert_called_once_with(self.msg) take_action.assert_awaited_once_with(cog, self.msg, found_token)
def test_is_valid_user_id_valid(self): """Should consider user IDs valid if they decode entirely to ASCII digits.""" ids = ( "NDcyMjY1OTQzMDYyNDEzMzMy", "NDc1MDczNjI5Mzk5NTQ3OTA0", "NDY3MjIzMjMwNjUwNzc3NjQx", ) for user_id in ids: with self.subTest(user_id=user_id): result = TokenRemover.is_valid_user_id(user_id) self.assertTrue(result)
async def test_take_action_delete_failure(self, mod_log_property): """Shouldn't send any messages if the token message can't be deleted.""" cog = TokenRemover(self.bot) mod_log_property.return_value = mock.create_autospec(ModLog, spec_set=True, instance=True) self.msg.delete.side_effect = NotFound(MagicMock(), MagicMock()) token = mock.create_autospec(Token, spec_set=True, instance=True) await cog.take_action(self.msg, token) self.msg.delete.assert_called_once_with() self.msg.channel.send.assert_not_awaited()
def test_is_valid_timestamp_valid(self): """Should consider timestamps valid if they're greater than the Discord epoch.""" timestamps = ( "XsyRkw", "Xrim9Q", "XsyR-w", "XsySD_", "Dn9r_A", ) for timestamp in timestamps: with self.subTest(timestamp=timestamp): result = TokenRemover.is_valid_timestamp(timestamp) self.assertTrue(result)
def test_is_valid_timestamp_invalid(self): """Should consider timestamps invalid if they're before Discord epoch or can't be parsed.""" timestamps = ( ("B4Yffw", "DISCORD_EPOCH - TOKEN_EPOCH - 1"), ("ew", "123"), ("AoIKgA", "42076800"), ("{hello}[world]&(bye!)", "ASCII invalid Base64"), ("Þíß-ï§-ňøẗ-våłìÐ", "Unicode invalid Base64"), ) for timestamp, msg in timestamps: with self.subTest(msg=msg): result = TokenRemover.is_valid_timestamp(timestamp) self.assertFalse(result)
def test_is_valid_user_id_invalid(self): """Should consider non-digit and non-ASCII IDs invalid.""" ids = ( ("SGVsbG8gd29ybGQ", "non-digit ASCII"), ("0J_RgNC40LLQtdGCINC80LjRgA", "cyrillic text"), ("4pO14p6L4p6C4pG34p264pGl8J-EiOKSj-KCieKBsA", "Unicode digits"), ("4oaA4oaB4oWh4oWi4Lyz4Lyq4Lyr4LG9", "Unicode numerals"), ("8J2fjvCdn5nwnZ-k8J2fr_Cdn7rgravvvJngr6c", "Unicode decimals"), ("{hello}[world]&(bye!)", "ASCII invalid Base64"), ("Þíß-ï§-ňøẗ-våłìÐ", "Unicode invalid Base64"), ) for user_id, msg in ids: with self.subTest(msg=msg): result = TokenRemover.is_valid_user_id(user_id) self.assertFalse(result)
def test_find_token_invalid_matches(self, token_re, token_cls, is_valid_id, is_valid_timestamp): """None should be returned if no matches have valid user IDs or timestamps.""" token_re.finditer.return_value = [ mock.create_autospec(Match, spec_set=True, instance=True) ] token_cls.return_value = mock.create_autospec(Token, spec_set=True, instance=True) is_valid_id.return_value = False is_valid_timestamp.return_value = False return_value = TokenRemover.find_token_in_message(self.msg) self.assertIsNone(return_value) token_re.finditer.assert_called_once_with(self.msg.content)
def test_format_log_message(self, log_message): """Should correctly format the log message with info from the message and token.""" token = Token("NDY3MjIzMjMwNjUwNzc3NjQx", "XsySD_", "s45jqDV_Iisn-symw0yDRrk_jf4") log_message.format.return_value = "Howdy" return_value = TokenRemover.format_log_message(self.msg, token) self.assertEqual(return_value, log_message.format.return_value) log_message.format.assert_called_once_with( author=self.msg.author, author_id=self.msg.author.id, channel=self.msg.channel.mention, user_id=token.user_id, timestamp=token.timestamp, hmac="x" * len(token.hmac), )
def test_find_token_valid_match(self, token_re, token_cls, is_valid_id, is_valid_timestamp): """The first match with a valid user ID and timestamp should be returned as a `Token`.""" matches = [ mock.create_autospec(Match, spec_set=True, instance=True), mock.create_autospec(Match, spec_set=True, instance=True), ] tokens = [ mock.create_autospec(Token, spec_set=True, instance=True), mock.create_autospec(Token, spec_set=True, instance=True), ] token_re.finditer.return_value = matches token_cls.side_effect = tokens is_valid_id.side_effect = ( False, True) # The 1st match will be invalid, 2nd one valid. is_valid_timestamp.return_value = True return_value = TokenRemover.find_token_in_message(self.msg) self.assertEqual(tokens[1], return_value) token_re.finditer.assert_called_once_with(self.msg.content)
async def relay_message(self, msg: Message) -> None: """Relays the message to the relevant watch channel.""" limit = BigBrotherConfig.header_message_limit if (msg.author.id != self.message_history.last_author or msg.channel.id != self.message_history.last_channel or self.message_history.message_count >= limit): self.message_history = MessageHistory(last_author=msg.author.id, last_channel=msg.channel.id) await self.send_header(msg) if TokenRemover.find_token_in_message(msg) or WEBHOOK_URL_RE.search( msg.content): cleaned_content = "Content is censored because it contains a bot or webhook token." elif cleaned_content := msg.clean_content: # Put all non-media URLs in a code block to prevent embeds media_urls = { embed.url for embed in msg.embeds if embed.type in ("image", "video") } for url in URL_RE.findall(cleaned_content): if url not in media_urls: cleaned_content = cleaned_content.replace(url, f"`{url}`")
def test_is_valid_timestamp(content: str, expected: bool): assert TokenRemover.is_valid_timestamp(content) is expected
class TokenRemoverTests(unittest.TestCase): """Tests the `TokenRemover` cog.""" def setUp(self): """Adds the cog, a bot, and a message to the instance for usage in tests.""" self.bot = MockBot() self.bot.get_cog.return_value = MagicMock() self.bot.get_cog.return_value.send_log_message = AsyncMock() self.cog = TokenRemover(bot=self.bot) self.msg = MockMessage(id=555, content='') self.msg.author.__str__ = MagicMock() self.msg.author.__str__.return_value = 'lemon' self.msg.author.bot = False self.msg.author.avatar_url_as.return_value = 'picture-lemon.png' self.msg.author.id = 42 self.msg.author.mention = '@lemon' self.msg.channel.mention = "#lemonade-stand" def test_is_valid_user_id_is_true_for_numeric_content(self): """A string decoding to numeric characters is a valid user ID.""" # MTIz = base64(123) self.assertTrue(TokenRemover.is_valid_user_id('MTIz')) def test_is_valid_user_id_is_false_for_alphabetic_content(self): """A string decoding to alphabetic characters is not a valid user ID.""" # YWJj = base64(abc) self.assertFalse(TokenRemover.is_valid_user_id('YWJj')) def test_is_valid_timestamp_is_true_for_valid_timestamps(self): """A string decoding to a valid timestamp should be recognized as such.""" self.assertTrue(TokenRemover.is_valid_timestamp('DN9r_A')) def test_is_valid_timestamp_is_false_for_invalid_values(self): """A string not decoding to a valid timestamp should not be recognized as such.""" # MTIz = base64(123) self.assertFalse(TokenRemover.is_valid_timestamp('MTIz')) def test_mod_log_property(self): """The `mod_log` property should ask the bot to return the `ModLog` cog.""" self.bot.get_cog.return_value = 'lemon' self.assertEqual(self.cog.mod_log, self.bot.get_cog.return_value) self.bot.get_cog.assert_called_once_with('ModLog') def test_ignores_bot_messages(self): """When the message event handler is called with a bot message, nothing is done.""" self.msg.author.bot = True coroutine = self.cog.on_message(self.msg) self.assertIsNone(asyncio.run(coroutine)) def test_ignores_messages_without_tokens(self): """Messages without anything looking like a token are ignored.""" for content in ('', 'lemon wins'): with self.subTest(content=content): self.msg.content = content coroutine = self.cog.on_message(self.msg) self.assertIsNone(asyncio.run(coroutine)) def test_ignores_messages_with_invalid_tokens(self): """Messages with values that are invalid tokens are ignored.""" for content in ('foo.bar.baz', 'x.y.'): with self.subTest(content=content): self.msg.content = content coroutine = self.cog.on_message(self.msg) self.assertIsNone(asyncio.run(coroutine)) def test_censors_valid_tokens(self): """Valid tokens are censored.""" cases = ( # (content, censored_token) ('MTIz.DN9R_A.xyz', 'MTIz.DN9R_A.xxx'), ) for content, censored_token in cases: with self.subTest(content=content, censored_token=censored_token): self.msg.content = content coroutine = self.cog.on_message(self.msg) with self.assertLogs(logger='bot.cogs.token_remover', level=logging.DEBUG) as cm: self.assertIsNone(asyncio.run(coroutine)) # no return value [line] = cm.output log_message = ( "Censored a seemingly valid token sent by " "lemon (`42`) in #lemonade-stand, " f"token was `{censored_token}`" ) self.assertIn(log_message, line) self.msg.delete.assert_called_once_with() self.msg.channel.send.assert_called_once_with( DELETION_MESSAGE_TEMPLATE.format(mention='@lemon') ) self.bot.get_cog.assert_called_with('ModLog') self.msg.author.avatar_url_as.assert_called_once_with(static_format='png') mod_log = self.bot.get_cog.return_value mod_log.ignore.assert_called_once_with(Event.message_delete, self.msg.id) mod_log.send_log_message.assert_called_once_with( icon_url=Icons.token_removed, colour=Colour(Colours.soft_red), title="Token removed!", text=log_message, thumbnail='picture-lemon.png', channel_id=Channels.mod_alerts )
def test_is_valid_user_id(content: str, expected: bool): assert TokenRemover.is_valid_user_id(content) is expected
def test_is_valid_timestamp_is_false_for_invalid_values(self): """A string not decoding to a valid timestamp should not be recognized as such.""" # MTIz = base64(123) self.assertFalse(TokenRemover.is_valid_timestamp('MTIz'))
def test_is_valid_timestamp_is_true_for_valid_timestamps(self): """A string decoding to a valid timestamp should be recognized as such.""" self.assertTrue(TokenRemover.is_valid_timestamp('DN9r_A'))
def test_is_valid_user_id_is_false_for_alphabetic_content(self): """A string decoding to alphabetic characters is not a valid user ID.""" # YWJj = base64(abc) self.assertFalse(TokenRemover.is_valid_user_id('YWJj'))
def test_is_valid_user_id_is_true_for_numeric_content(self): """A string decoding to numeric characters is a valid user ID.""" # MTIz = base64(123) self.assertTrue(TokenRemover.is_valid_user_id('MTIz'))
async def on_message(self, msg: Message) -> None: """ Detect poorly formatted Python code in new messages. If poorly formatted code is detected, send the user a helpful message explaining how to do properly formatted Python syntax highlighting codeblocks. """ is_help_channel = (getattr(msg.channel, "category", None) and msg.channel.category.id in (Categories.help_available, Categories.help_in_use)) parse_codeblock = ((is_help_channel or msg.channel.id in self.channel_cooldowns or msg.channel.id in self.channel_whitelist) and not msg.author.bot and len(msg.content.splitlines()) > 3 and not TokenRemover.find_token_in_message(msg)) if parse_codeblock: # no token in the msg on_cooldown = (time.time() - self.channel_cooldowns.get(msg.channel.id, 0)) < 300 if not on_cooldown or DEBUG_MODE: try: if self.has_bad_ticks(msg): ticks = msg.content[:3] content = self.codeblock_stripping( f"```{msg.content[3:-3]}```", True) if content is None: return content, repl_code = content if len(content) == 2: content = content[1] else: content = content[0] space_left = 204 if len(content) >= space_left: current_length = 0 lines_walked = 0 for line in content.splitlines(keepends=True): if current_length + len( line ) > space_left or lines_walked == 10: break current_length += len(line) lines_walked += 1 content = content[:current_length] + "#..." content_escaped_markdown = RE_MARKDOWN.sub( r'\\\1', content) howto = ( "It looks like you are trying to paste code into this channel.\n\n" "You seem to be using the wrong symbols to indicate where the codeblock should start. " f"The correct symbols would be \\`\\`\\`, not `{ticks}`.\n\n" "**Here is an example of how it should look:**\n" f"\\`\\`\\`python\n{content_escaped_markdown}\n\\`\\`\\`\n\n" "**This will result in the following:**\n" f"```python\n{content}\n```") else: howto = "" content = self.codeblock_stripping(msg.content, False) if content is None: return content, repl_code = content # Attempts to parse the message into an AST node. # Invalid Python code will raise a SyntaxError. tree = ast.parse(content[0]) # Multiple lines of single words could be interpreted as expressions. # This check is to avoid all nodes being parsed as expressions. # (e.g. words over multiple lines) if not all( isinstance(node, ast.Expr) for node in tree.body) or repl_code: # Shorten the code to 10 lines and/or 204 characters. space_left = 204 if content and repl_code: content = content[1] else: content = content[0] if len(content) >= space_left: current_length = 0 lines_walked = 0 for line in content.splitlines(keepends=True): if current_length + len( line ) > space_left or lines_walked == 10: break current_length += len(line) lines_walked += 1 content = content[:current_length] + "#..." content_escaped_markdown = RE_MARKDOWN.sub( r'\\\1', content) howto += ( "It looks like you're trying to paste code into this channel.\n\n" "Discord has support for Markdown, which allows you to post code with full " "syntax highlighting. Please use these whenever you paste code, as this " "helps improve the legibility and makes it easier for us to help you.\n\n" f"**To do this, use the following method:**\n" f"\\`\\`\\`python\n{content_escaped_markdown}\n\\`\\`\\`\n\n" "**This will result in the following:**\n" f"```python\n{content}\n```") log.debug( f"{msg.author} posted something that needed to be put inside python code " "blocks. Sending the user some instructions.") else: log.trace( "The code consists only of expressions, not sending instructions" ) if howto != "": howto_embed = Embed(description=howto) bot_message = await msg.channel.send( f"Hey {msg.author.mention}!", embed=howto_embed) self.codeblock_message_ids[msg.id] = bot_message.id self.bot.loop.create_task( wait_for_deletion(bot_message, user_ids=(msg.author.id, ), client=self.bot)) else: return if msg.channel.id not in self.channel_whitelist: self.channel_cooldowns[msg.channel.id] = time.time() except SyntaxError: log.trace( f"{msg.author} posted in a help channel, and when we tried to parse it as Python code, " "ast.parse raised a SyntaxError. This probably just means it wasn't Python code. " f"The message that was posted was:\n\n{msg.content}\n\n" )
def token_remover(): bot = MagicMock() bot.get_cog.return_value = MagicMock() bot.get_cog.return_value.send_log_message = AsyncMock() return TokenRemover(bot=bot)