async def spamscan_classify(e): """ Feed the algorithm by classifying a user either as spam or ham """ args, user = parse_arguments(e.pattern_match.group(1), ['forward', 'reason']) reason = args.get('reason', 'spam[gban]') args['forward'] = args.get('forward', True) args['user'] = user await e.edit(CLASSIFYING_MESSAGE.format("**Fetching user information.**")) replied_user = await get_user_from_event(e, **args) if not replied_user: await e.edit("**Failed to get information for user**", delete_in=3) return me = await bot.get_me() if replied_user.user == me: await e.edit("**Can't flag yourself as spam**", delete_in=3) return hashes = await gather_profile_pic_hashes(e, replied_user.user) if hashes: await e.edit(CLASSIFYING_MESSAGE.format("**Adding profile pic hashes to DB**")) for hsh in hashes: await add_file_hash(hsh, 'profile pic') if spamwatch: await e.edit(CLASSIFYING_MESSAGE.format("**Checking spamwatch.**")) gbanned = spamwatch.get_ban(replied_user.user.id) if not gbanned: await e.edit(CLASSIFYING_MESSAGE.format("**Adding to SpamWatch.**")) try: spamwatch.add_ban(replied_user.user.id, reason) except UnauthorizedError: pass await e.edit(f"**Flagged** {make_mention(replied_user.user)} **as spam**\n" f"**Reason:** {reason}")
async def fetch_info(replied_user, **kwargs): """ Get details from the User object. """ user = replied_user.user id_only = kwargs.get('id', False) show_general = kwargs.get('general', True) show_bot = kwargs.get('bot', False) show_misc = kwargs.get('misc', False) show_all = kwargs.get('all', False) mention_name = kwargs.get('mention', False) if show_all: show_general = True show_bot = True show_misc = True full_name = str(user.first_name + ' ' + (user.last_name or '')) if mention_name: title = Link(full_name, f'tg://user?id={user.id}') else: title = Bold(full_name) if id_only: return KeyValueItem(title, Code(user.id)) general = SubSection( Bold('general'), KeyValueItem('id', Code(user.id)), KeyValueItem('first_name', Code(user.first_name)), KeyValueItem('last_name', Code(user.last_name)), KeyValueItem('username', Code(user.username)), KeyValueItem('mutual_contact', Code(user.mutual_contact)), KeyValueItem('common groups', Code(replied_user.common_chats_count))) if spamwatch: banobj = spamwatch.get_ban(user.id) if banobj: general.items.append( KeyValueItem('gbanned', f'True / {banobj.reason}')) else: general.items.append(KeyValueItem('gbanned', 'False')) bot = SubSection( Bold('bot'), KeyValueItem('bot', Code(user.bot)), KeyValueItem('bot_chat_history', Code(user.bot_chat_history)), KeyValueItem('bot_info_version', Code(user.bot_info_version)), KeyValueItem('bot_inline_geo', Code(user.bot_inline_geo)), KeyValueItem('bot_inline_placeholder', Code(user.bot_inline_placeholder)), KeyValueItem('bot_nochats', Code(user.bot_nochats))) misc = SubSection( Bold('misc'), KeyValueItem('restricted', Code(user.restricted)), KeyValueItem('restriction_reason', Code(user.restriction_reason)), KeyValueItem('deleted', Code(user.deleted)), KeyValueItem('verified', Code(user.verified)), KeyValueItem('min', Code(user.min)), KeyValueItem('lang_code', Code(user.lang_code))) return Section(title, general if show_general else None, misc if show_misc else None, bot if show_bot else None)
async def _collect_user_info(self, client, user, **kwargs) -> Union[Section, KeyValueItem]: id_only = kwargs.get('id', False) show_general = kwargs.get('general', True) show_bot = kwargs.get('bot', False) show_misc = kwargs.get('misc', False) show_all = kwargs.get('all', False) if show_all: show_general = True show_bot = True show_misc = True mention_name = kwargs.get('mention', False) full_name = await helpers.get_full_name(user) if mention_name: title = Link(full_name, f'tg://user?id={user.id}') else: title = Bold(full_name) if spamwatch: ban = spamwatch.get_ban(user.id) ban_reason = ban.reason if ban else None else: ban_reason = None if id_only: return KeyValueItem(title, Code(user.id)) else: general = SubSection( Bold('general'), KeyValueItem('id', Code(user.id)), KeyValueItem('first_name', Code(user.first_name)), KeyValueItem('last_name', Code(user.last_name)), KeyValueItem('username', Code(user.username)), KeyValueItem('mutual_contact', Code(user.mutual_contact)), KeyValueItem('ban_reason', Code(ban_reason)) if ban_reason else KeyValueItem('gbanned', Code('False'))) ibot = SubSection( Bold('bot'), KeyValueItem('bot', Code(user.bot)), KeyValueItem('bot_chat_history', Code(user.bot_chat_history)), KeyValueItem('bot_info_version', Code(user.bot_info_version)), KeyValueItem('bot_inline_geo', Code(user.bot_inline_geo)), KeyValueItem('bot_inline_placeholder', Code(user.bot_inline_placeholder)), KeyValueItem('bot_nochats', Code(user.bot_nochats))) misc = SubSection( Bold('misc'), KeyValueItem('restricted', Code(user.restricted)), KeyValueItem('restriction_reason', Code(user.restriction_reason)), KeyValueItem('deleted', Code(user.deleted)), KeyValueItem('verified', Code(user.verified)), KeyValueItem('min', Code(user.min)), KeyValueItem('lang_code', Code(user.lang_code))) return Section(title, general if show_general else None, misc if show_misc else None, ibot if show_bot else None)
async def score_user(event, userfull): """ Give a user a spam score based on several factors """ user = userfull.user # Everyone starts with a score of 0. A lower score indicates # a lower chance of being a spammer. A higher score # indicates the opposite. score = {} hashes = await gather_profile_pic_hashes(event, user) total_hashes = len(hashes) matching_hashes = 0 # User was flagged as a scammer if user.scam: score.update({'flagged as scammer': 5}) # User is restricted if user.restricted: score.update({'restricted': 3}) # No profile pic is a +2 if total_hashes == 0: score.update({'no profile pic': 2}) # A single profile pic can also be a red flag elif total_hashes == 1: score.update({'single profile pic': 2}) # If all the profile pics are the same that's another red flag elif total_hashes >= 2 and len(set(hashes)) == 1: score.update({'profile pics same': 2}) if matching_hashes > 0: # If there are matching hashes that's an automatic +5 score.update( {f'blacklisted photos ({total_hashes}/{matching_hashes})': 5}) # Lots of spammers try and look normal by having a normal(ish) # first and last name. A first AND last name with no special # characters is a good indicator. This is a +1. if ((user.first_name and re.match(r"^[a-zA-Z0-9\s_]+$", user.first_name)) and (user.last_name and re.match(r"^[a-zA-Z0-9\s_]+$", user.last_name))): score.update({'alphanum first and last name': 1}) if user.first_name and user.last_name: # Another thing spammers seem to have is very predictable names. # These come in many forms like one uppercase name and one # lowercase, all upper or lower, or having one name be # numeric. Either way they generally have a first # name and a last name. if user.first_name.isupper() and user.first_name.islower(): score.update({'first upper last lower': 3}) elif user.last_name.isupper() and user.last_name.islower(): score.update({'first lower last upper': 3}) elif user.first_name.islower() and user.last_name.islower(): # This appears less bot like than all upper score.update({'lowercase name': 2}) elif user.first_name.isupper() and user.last_name.isupper(): score.update({'uppercase name': 3}) elif user.first_name.isnumeric() or user.last_name.isnumeric(): score.update({'numeric name': 2}) # Another popular thing is bots with japanese, chinese, cyrillic, # and arabic names. A full match here is worth +3. if (user.first_name and is_cjk(user.first_name) or (user.last_name and is_cjk(user.last_name))): score.update({'ch/jp name': 3}) elif (user.first_name and is_arabic(user.first_name) or (user.last_name and is_arabic(user.last_name))): score.update({'arabic name': 3}) elif (user.first_name and is_cyrillic(user.first_name) or (user.last_name and is_cyrillic(user.last_name))): # Cyrillic names are more common, so we'll drop the score here. score.update({'cyrillic name': 2}) if userfull.about and is_cjk(userfull.about): score.update({'ch/jp bio': 2}) elif userfull.about and is_arabic(userfull.about): score.update({'arabic bio': 2}) elif userfull.about and is_cyrillic(userfull.about): score.update({'cyrillic bio': 2}) # A username ending in numbers is a +1 if user.username: if re.match(r".*[0-9]+$", user.username): score.update(({'sequential username': 2})) else: score.update({'no username': 2}) if userfull.about: # Check the bio for red flag words. Each one of these is a +3. total_red_flags = 0 for word in REDFLAG_WORDS: if word in userfull.about.lower(): total_red_flags += 1 if total_red_flags > 0: score.update( {f'red flag words x{total_red_flags}': total_red_flags * 3}) # No bio is also an indicator worth an extra 2 points else: score.update({'no bio': 2}) # Check if this person is banned in spamwatch. This is # basically a guarantee, and therefore nets a +5. if spamwatch: spamwatch_ban = spamwatch.get_ban(user.id) if spamwatch_ban: score.update({f'spamwatch ({spamwatch_ban.reason.lower()})': 5}) return score