def make_api_call_for_site(self, site): posts = self.queue.pop(site) url = "http://api.stackexchange.com/2.2/questions/" + ";".join(str(x) for x in posts) + "?site=" + site + "&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw((" # wait to make sure API has/updates post data time.sleep(60) try: response = requests.get(url, timeout=20).json() except requests.exceptions.Timeout: return # could add some retrying logic here, but eh. GlobalVars.apiquota = response["quota_remaining"] for post in response["items"]: title = GlobalVars.parser.unescape(post["title"]) body = GlobalVars.parser.unescape(post["body"]) link = post["link"] try: owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"]) owner_link = post["owner"]["link"] owner_rep = post["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 q_id = str(post["question_id"]) is_spam, reason = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False) if owner_rep <= 50 and is_spam: try: handle_spam(title, owner_name, site, link, owner_link, q_id, reason, False) except: print "NOP" try: for answer in post["answers"]: answer_title = "" body = answer["body"] print "got answer from owner with name " + owner_name link = answer["link"] a_id = str(answer["answer_id"]) try: owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"]) owner_link = answer["owner"]["link"] owner_rep = answer["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 is_spam, reason = check_if_spam(answer_title, body, owner_name, owner_link, site, a_id, True, False) if owner_rep <= 50 and is_spam: try: handle_spam(title, owner_name, site, link, owner_link, a_id, reason, True) except: print "NOP" except: print "no answers" return
def test_handle_spam_repeating_characters(cls): post = cls.mock_post(title='aaaaaaaaaaaaaa') is_spam, reasons, why = check_if_spam(post) handle_spam(post=post, reasons=reasons, why=why) chatcommunicate.tell_rooms.assert_called_once_with( Matcher(containing='aaaaaaaaaaaaaa', without='Potentially offensive title'), ANY, ANY, notify_site=ANY, report_data=ANY )
def test_handle_spam_offensive_title(): GlobalVars.deletion_watcher = MagicMock( ) # Mock the deletion watcher in test chatcommunicate.tell_rooms = MagicMock() post = mock_post(title='f**k') is_spam, reasons, why = check_if_spam(post) handle_spam(post, reasons, why) chatcommunicate.tell_rooms.assert_called_once_with(StringMatcher( containing='Potentially offensive title', without='f**k'), ANY, ANY, notify_site=ANY, report_data=ANY)
def test_handle_spam_offensive_title(cls): post = cls.mock_post(title='f**k') is_spam, reasons, why = check_if_spam(post) handle_spam(post=post, reasons=reasons, why=why) call_a = call( Matcher(containing='f**k', without='potentially offensive title'), ANY, Matcher(containing='offensive-mask', without='no-offensive-mask'), notify_site=ANY, report_data=ANY ) call_b = call( Matcher(containing='potentially offensive title', without='f**k'), ANY, Matcher(containing='no-offensive-mask', without='offensive-mask'), notify_site=ANY, report_data=ANY ) chatcommunicate.tell_rooms.assert_has_calls([call_a, call_b])
def handle_websocket_data(data): if "message" not in data: return message = data['message'] if not isinstance(message, Iterable): return if "message" in message: chatcommunicate.tell_rooms_with("metasmoke", message['message']) elif "autoflag_fp" in message: event = message["autoflag_fp"] chatcommunicate.tell_rooms(event["message"], ("debug", "site-" + event["site"]), ("no-site-" + event["site"], ), notify_site="/autoflag_fp") elif "exit" in message: os._exit(message["exit"]) elif "blacklist" in message: ids = (message['blacklist']['uid'], message['blacklist']['site']) datahandling.add_blacklisted_user(ids, "metasmoke", message['blacklist']['post']) datahandling.last_feedbacked = (ids, time.time() + 60) elif "unblacklist" in message: ids = (message['unblacklist']['uid'], message['unblacklist']['site']) datahandling.remove_blacklisted_user(ids) elif "naa" in message: post_site_id = parsing.fetch_post_id_and_site_from_url( message["naa"]["post_link"]) datahandling.add_ignored_post(post_site_id[0:2]) elif "fp" in message: post_site_id = parsing.fetch_post_id_and_site_from_url( message["fp"]["post_link"]) datahandling.add_false_positive(post_site_id[0:2]) elif "report" in message: import chatcommands # Do it here chatcommands.report_posts([message["report"]["post_link"]], "the metasmoke API", None, "the metasmoke API") return post_data = apigetpost.api_get_post(message["report"]["post_link"]) if post_data is None or post_data is False: return if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \ and not datahandling.is_false_positive((post_data.post_id, post_data.site)): return user = parsing.get_user_from_url(post_data.owner_url) post = classes.Post(api_response=post_data.as_dict) scan_spam, scan_reasons, scan_why = spamhandling.check_if_spam( post) if scan_spam: why_append = u"This post would have also been caught for: " + \ u", ".join(scan_reasons).capitalize() + "\n" + scan_why else: why_append = u"This post would not have been caught otherwise." # Add user to blacklist *after* post is scanned if user is not None: datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url) why = u"Post manually reported by user *{}* from metasmoke.\n\n{}".format( message["report"]["user"], why_append) spamhandling.handle_spam( post=post, reasons=["Manually reported " + post_data.post_type], why=why) elif "deploy_updated" in message: return # Disabled sha = message["deploy_updated"]["head_commit"]["id"] if sha != os.popen('git log -1 --pretty="%H"').read(): if "autopull" in message["deploy_updated"]["head_commit"][ "message"]: if only_blacklists_changed(GitManager.get_remote_diff()): commit_md = "[`{0}`](https://github.com/{1}/commit/{0})" \ .format(sha[:7], GlobalVars.bot_repo_slug) integrity = blacklist_integrity_check() if len(integrity) == 0: # No issues GitManager.pull_remote() findspam.reload_blacklists() chatcommunicate.tell_rooms_with( "debug", "No code modified in {0}, only blacklists" " reloaded.".format(commit_md)) else: integrity.append("please fix before pulling.") chatcommunicate.tell_rooms_with( "debug", ", ".join(integrity)) elif "commit_status" in message: c = message["commit_status"] sha = c["commit_sha"][:7] if c["commit_sha"] == sp.check_output( ["git", "log", "-1", "--pretty=%H"]).decode('utf-8').strip(): return if c["status"] == "success": if "autopull" in c["commit_message"]: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/{repo}/" \ "commit/{commit_sha}) succeeded. Message contains 'autopull', pulling...".format( ci_link=c["ci_url"], repo=GlobalVars.bot_repo_slug, commit_sha=sha) remote_diff = GitManager.get_remote_diff() if only_blacklists_changed(remote_diff): GitManager.pull_remote() if not GlobalVars.on_master: # Restart if HEAD detached log('warning', "Pulling remote with HEAD detached, checkout deploy", f=True) os._exit(8) GlobalVars.reload() findspam.FindSpam.reload_blacklists() chatcommunicate.tell_rooms_with( 'debug', GlobalVars.s_norestart) elif only_modules_changed(remote_diff): GitManager.pull_remote() if not GlobalVars.on_master: # Restart if HEAD detached log('warning', "Pulling remote with HEAD detached, checkout deploy", f=True) os._exit(8) GlobalVars.reload() reload_modules() chatcommunicate.tell_rooms_with( 'debug', GlobalVars.s_norestart2) else: chatcommunicate.tell_rooms_with('debug', s, notify_site="/ci") os._exit(3) else: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/{repo}/commit/{commit_sha}) " \ "succeeded.".format(ci_link=c["ci_url"], repo=GlobalVars.bot_repo_slug, commit_sha=sha) chatcommunicate.tell_rooms_with("debug", s, notify_site="/ci") elif c["status"] == "failure": s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/{repo}/commit/{commit_sha}) " \ "failed.".format(ci_link=c["ci_url"], repo=GlobalVars.bot_repo_slug, commit_sha=sha) chatcommunicate.tell_rooms_with("debug", s, notify_site="/ci") elif "everything_is_broken" in message: if message["everything_is_broken"] is True: os._exit(6)
def handle_commands(content_lower, message_parts, ev_room, ev_room_name, ev_user_id, ev_user_name, wrap2, content, message_id): message_url = "//chat." + wrap2.host + "/transcript/message/" + str(message_id) second_part_lower = "" if len(message_parts) < 2 else message_parts[1].lower() if second_part_lower == "f": second_part_lower = "fp-" if second_part_lower == "k": second_part_lower = "tpu-" if re.compile("^:[0-9]+$").search(message_parts[0]): msg_id = int(message_parts[0][1:]) msg = wrap2.get_message(msg_id) msg_content = msg.content_source quiet_action = ("-" in second_part_lower) if str(msg.owner.id) != GlobalVars.smokeDetector_user_id[ev_room] or msg_content is None: return post_url = fetch_post_url_from_msg_content(msg_content) post_site_id = fetch_post_id_and_site_from_msg_content(msg_content) if post_site_id is not None: post_type = post_site_id[2] else: post_type = None if (second_part_lower.startswith("false") or second_part_lower.startswith("fp")) \ and is_privileged(ev_room, ev_user_id, wrap2): if post_site_id is None: return "That message is not a report." t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post, args=(post_url, second_part_lower, ev_user_name, )) t_metasmoke.start() add_false_positive((post_site_id[0], post_site_id[1])) user_added = False if second_part_lower.startswith("falseu") or second_part_lower.startswith("fpu"): url_from_msg = fetch_owner_url_from_msg_content(msg_content) if url_from_msg is not None: user = get_user_from_url(url_from_msg) if user is not None: add_whitelisted_user(user) user_added = True if post_type == "question": if user_added and not quiet_action: return "Registered question as false positive and whitelisted user." elif not quiet_action: return "Registered question as false positive." elif post_type == "answer": if user_added and not quiet_action: return "Registered answer as false positive and whitelisted user." elif not quiet_action: return "Registered answer as false positive." try: msg.delete() except: pass if (second_part_lower.startswith("true") or second_part_lower.startswith("tp")) \ and is_privileged(ev_room, ev_user_id, wrap2): if post_site_id is None: return "That message is not a report." t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post, args=(post_url, second_part_lower, ev_user_name, )) t_metasmoke.start() user_added = False if second_part_lower.startswith("trueu") or second_part_lower.startswith("tpu"): url_from_msg = fetch_owner_url_from_msg_content(msg_content) if url_from_msg is not None: user = get_user_from_url(url_from_msg) if user is not None: add_blacklisted_user(user, message_url, "http:" + post_url) user_added = True if post_type == "question": if not quiet_action: if user_added: return "Blacklisted user and registered question as true positive." return "Recorded question as true positive in metasmoke. Use `tpu` or `trueu` if you want to blacklist a user." else: return None elif post_type == "answer": if not quiet_action: if user_added: return "Blacklisted user." return "Recorded answer as true positive in metasmoke. If you want to blacklist the poster of the answer, use `trueu` or `tpu`." else: return None if second_part_lower.startswith("ignore") and is_privileged(ev_room, ev_user_id, wrap2): if post_site_id is None: return "That message is not a report." t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post, args=(post_url, second_part_lower, ev_user_name, )) t_metasmoke.start() add_ignored_post(post_site_id[0:2]) if not quiet_action: return "Post ignored; alerts about it will no longer be posted." else: return None if (second_part_lower.startswith("delete") or second_part_lower.startswith("remove") or second_part_lower.startswith("gone") or second_part_lower.startswith("poof") or second_part_lower == "del") and is_privileged(ev_room, ev_user_id, wrap2): try: msg.delete() except: pass # couldn't delete message if second_part_lower.startswith("postgone") and is_privileged(ev_room, ev_user_id, wrap2): edited = edited_message_after_postgone_command(msg_content) if edited is None: return "That's not a report." msg.edit(edited) return None if second_part_lower.startswith("why"): t = fetch_post_id_and_site_from_msg_content(msg_content) if t is None: t = fetch_user_from_allspam_report(msg_content) if t is None: return "That's not a report." why = get_why_allspam(t) if why is None or why == "": return "There is no `why` data for that user (anymore)." else: return why post_id, site, _ = t why = get_why(site, post_id) if why is None or why == "": return "There is no `why` data for that post (anymore)." else: return why if content_lower.startswith("!!/addblu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": add_blacklisted_user((uid, val), message_url, "") return "User blacklisted (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/addblu profileurl` *or* `!!/addblu userid sitename`." if content_lower.startswith("!!/rmblu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": if remove_blacklisted_user((uid, val)): return "User removed from blacklist (`{}` on `{}`).".format(uid, val) else: return "User is not blacklisted." elif uid == -2: return "Error: {}".format(val) else: return False, "Invalid format. Valid format: `!!/rmblu profileurl` *or* `!!/rmblu userid sitename`." if content_lower.startswith("!!/isblu"): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": if is_blacklisted_user((uid, val)): return "User is blacklisted (`{}` on `{}`).".format(uid, val) else: return "User is not blacklisted (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return False, "Invalid format. Valid format: `!!/isblu profileurl` *or* `!!/isblu userid sitename`." if content_lower.startswith("!!/addwlu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": add_whitelisted_user((uid, val)) return "User whitelisted (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return False, "Invalid format. Valid format: `!!/addwlu profileurl` *or* `!!/addwlu userid sitename`." if content_lower.startswith("!!/rmwlu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid != -1 and val != "": if remove_whitelisted_user((uid, val)): return "User removed from whitelist (`{}` on `{}`).".format(uid, val) else: return "User is not whitelisted." elif uid == -2: return "Error: {}".format(val) else: return False, "Invalid format. Valid format: `!!/rmwlu profileurl` *or* `!!/rmwlu userid sitename`." if content_lower.startswith("!!/iswlu"): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": if is_whitelisted_user((uid, val)): return "User is whitelisted (`{}` on `{}`).".format(uid, val) else: return "User is not whitelisted (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return False, "Invalid format. Valid format: `!!/iswlu profileurl` *or* `!!/iswlu userid sitename`." if content_lower.startswith("!!/report") \ and is_privileged(ev_room, ev_user_id, wrap2): crn, wait = can_report_now(ev_user_id, wrap2.host) if not crn: return "You can execute the !!/report command again in {} seconds. " \ "To avoid one user sending lots of reports in a few commands and slowing SmokeDetector down due to rate-limiting, " \ "you have to wait 30 seconds after you've reported multiple posts using !!/report, even if your current command just has one URL. " \ "(Note that this timeout won't be applied if you only used !!/report for one post)".format(wait) if len(message_parts) < 2: return False, "Not enough arguments." output = [] index = 0 urls = list(set(message_parts[1:])) if len(urls) > 5: return False, "To avoid SmokeDetector reporting posts too slowly, " \ "you can report at most 5 posts at a time. " \ "This is to avoid SmokeDetector's chat messages getting rate-limited too much, " \ "which would slow down reports." for url in urls: index += 1 post_data = api_get_post(url) if post_data is None: output.append("Post {}: That does not look like a valid post URL.".format(index)) continue if post_data is False: output.append("Post {}: Could not find data for this post in the API. It may already have been deleted.".format(index)) continue user = get_user_from_url(post_data.owner_url) if user is not None: add_blacklisted_user(user, message_url, post_data.post_url) why = u"Post manually reported by user *{}* in room *{}*.\n".format(ev_user_name, ev_room_name.decode('utf-8')) batch = "" if len(urls) > 1: batch = " (batch report: post {} out of {})".format(index, len(urls)) handle_spam(post_data.title, post_data.body, post_data.owner_name, post_data.site, post_data.post_url, post_data.owner_url, post_data.post_id, ["Manually reported " + post_data.post_type + batch], post_data.post_type == "answer", why, post_data.owner_rep, post_data.score, post_data.up_vote_count, post_data.down_vote_count) if 1 < len(urls) > len(output): add_or_update_multiple_reporter(ev_user_id, wrap2.host, time.time()) if len(output) > 0: return os.linesep.join(output) else: return None if content_lower.startswith("!!/allspam") and is_privileged(ev_room, ev_user_id, wrap2): if len(message_parts) != 2: return False, "1 argument expected" url = message_parts[1] user = get_user_from_url(url) if user is None: return "That doesn't look like a valid user URL." why = u"User manually reported by *{}* in room *{}*.\n".format(ev_user_name, ev_room_name.decode('utf-8')) handle_user_with_all_spam(user, why) if content_lower.startswith("!!/wut"): return "Whaddya mean, 'wut'? Humans..." if content_lower.startswith("!!/lick"): return "*licks ice cream cone*" if content_lower.startswith("!!/alive"): if ev_room == GlobalVars.charcoal_room_id: return 'Of course' elif ev_room == GlobalVars.meta_tavern_room_id or ev_room == GlobalVars.socvr_room_id: return random.choice(['Yup', 'You doubt me?', 'Of course', '... did I miss something?', 'plz send teh coffee', 'Watching this endless list of new questions *never* gets boring', 'Kinda sorta']) if content_lower.startswith("!!/rev") or content_lower.startswith("!!/ver"): return '[' + \ GlobalVars.commit_with_author + \ '](https://github.com/Charcoal-SE/SmokeDetector/commit/' + \ GlobalVars.commit + \ ')' if content_lower.startswith("!!/status"): now = datetime.utcnow() diff = now - UtcDate.startup_utc_date minutes, remainder = divmod(diff.seconds, 60) minutestr = "minutes" if minutes != 1 else "minute" return 'Running since {} UTC ({} {})'.format(GlobalVars.startup_utc, minutes, minutestr) if content_lower.startswith("!!/reboot"): if is_privileged(ev_room, ev_user_id, wrap2): post_message_in_room(ev_room, "Goodbye, cruel world") os._exit(5) if content_lower.startswith("!!/stappit"): if is_privileged(ev_room, ev_user_id, wrap2): post_message_in_room(ev_room, "Goodbye, cruel world") os._exit(6) if content_lower.startswith("!!/master"): if is_privileged(ev_room, ev_user_id, wrap2): os._exit(8) if content_lower.startswith("!!/clearbl"): if is_privileged(ev_room, ev_user_id, wrap2): if os.path.isfile("blacklistedUsers.txt"): os.remove("blacklistedUsers.txt") GlobalVars.blacklisted_users = [] return "Kaboom, blacklisted users cleared." else: return "There are no blacklisted users at the moment." if content_lower.startswith("!!/block"): if is_privileged(ev_room, ev_user_id, wrap2): timeToBlock = content_lower[9:].strip() timeToBlock = int(timeToBlock) if timeToBlock else 0 if 0 < timeToBlock < 14400: GlobalVars.blockedTime = time.time() + timeToBlock else: GlobalVars.blockedTime = time.time() + 900 GlobalVars.charcoal_hq.send_message("Reports blocked for {} seconds.".format(GlobalVars.blockedTime - time.time())) return "blocked" if content_lower.startswith("!!/unblock"): if is_privileged(ev_room, ev_user_id, wrap2): GlobalVars.blockedTime = time.time() GlobalVars.charcoal_hq.send_message("Reports unblocked.") return "unblocked" if content_lower.startswith("!!/errorlogs"): if is_privileged(ev_room, ev_user_id, wrap2): count = -1 if len(message_parts) != 2: return "The !!/errorlogs command requires 1 argument." try: count = int(message_parts[1]) except ValueError: pass if count == -1: return "Invalid argument." logs_part = fetch_lines_from_error_log(count) post_message_in_room(ev_room, logs_part, False) if content_lower.startswith("!!/pull"): if is_privileged(ev_room, ev_user_id, wrap2): r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/git/refs/heads/master') latest_sha = r.json()["object"]["sha"] r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/commits/' + latest_sha + '/statuses') states = [] for status in r.json(): state = status["state"] states.append(state) if "success" in states: os._exit(3) elif "error" in states or "failure" in states: return "CI build failed! :( Please check your commit." elif "pending" in states or not states: return "CI build is still pending, wait until the build has finished and then pull again." if content_lower.startswith("!!/help") or content_lower.startswith("!!/info"): return "I'm [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector), a bot that detects spam and offensive posts on the network and posts alerts to chat. [A command list is available here](https://github.com/Charcoal-SE/SmokeDetector/wiki/Commands)." if content_lower.startswith("!!/apiquota"): return "The current API quota remaining is {}.".format(GlobalVars.apiquota) if content_lower.startswith("!!/whoami"): if (ev_room in GlobalVars.smokeDetector_user_id): return "My id for this room is {}.".format(GlobalVars.smokeDetector_user_id[ev_room]) else: return "I don't know my user ID for this room. (Something is wrong, and it's apnorton's fault.)" if content_lower.startswith("!!/location"): return GlobalVars.location if content_lower.startswith("!!/queuestatus"): post_message_in_room(ev_room, GlobalVars.bodyfetcher.print_queue(), False) if content_lower.startswith("!!/blame"): GlobalVars.users_chatting[ev_room] = list(set(GlobalVars.users_chatting[ev_room])) # Make unique user_to_blame = random.choice(GlobalVars.users_chatting[ev_room]) return u"It's [{}]({})'s fault.".format(user_to_blame[0], user_to_blame[1]) if "smokedetector" in content_lower and "fault" in content_lower and ("xkcdbot" in ev_user_name.lower() or "bjb568" in ev_user_name.lower()): return "Liar" if content_lower.startswith("!!/coffee"): return "*brews coffee for @" + ev_user_name.replace(" ", "") + "*" if content_lower.startswith("!!/tea"): return "*brews a cup of " + random.choice(['earl grey', 'green', 'chamomile', 'lemon', 'darjeeling', 'mint']) + " tea for @" + ev_user_name.replace(" ", "") + "*" if content_lower.startswith("!!/brownie"): return "Brown!" if content_lower.startswith("!!/hats"): wb_end = datetime(2016, 1, 4, 0, 0, 0) now = datetime.utcnow() if wb_end > now: diff = wb_end - now hours, remainder = divmod(diff.seconds, 3600) minutes, seconds = divmod(remainder, 60) daystr = "days" if diff.days != 1 else "day" hourstr = "hours" if hours != 1 else "hour" minutestr = "minutes" if minutes != 1 else "minute" secondstr = "seconds" if seconds != 1 else "second" return "HURRY UP AND EARN MORE HATS! Winterbash will be over in {} {}, {} {}, {} {}, and {} {}. :(".format(diff.days, daystr, hours, hourstr, minutes, minutestr, seconds, secondstr) else: return "Winterbash is over. :(" if content_lower.startswith("!!/test"): string_to_test = content[8:] if len(string_to_test) == 0: return "Nothing to test" result = "> " reasons, why = FindSpam.test_post(string_to_test, string_to_test, string_to_test, "", False, False, 1, 0) if len(reasons) == 0: result += "Would not be caught for title, body, and username." return result result += ", ".join(reasons).capitalize() if why is not None and len(why) > 0: result += "\n----------\n" result += why return result if content_lower.startswith("!!/amiprivileged"): if is_privileged(ev_room, ev_user_id, wrap2): return "Yes, you are a privileged user." else: return "No, you are not a privileged user." if content_lower.startswith("!!/notify"): if len(message_parts) != 3: return False, "2 arguments expected" user_id = int(ev_user_id) chat_site = wrap2.host room_id = message_parts[1] if not room_id.isdigit(): return False, "Room ID is invalid." else: room_id = int(room_id) quiet_action = ("-" in message_parts[2]) se_site = message_parts[2].replace('-', '') r, full_site = add_to_notification_list(user_id, chat_site, room_id, se_site) if r == 0: if not quiet_action: return "You'll now get pings from me if I report a post on `%s`, in room `%s` on `chat.%s`" % (full_site, room_id, chat_site) else: return None elif r == -1: return "That notification configuration is already registered." elif r == -2: return False, "The given SE site does not exist." if content_lower.startswith("!!/unnotify"): if len(message_parts) != 3: return False, "2 arguments expected" user_id = int(ev_user_id) chat_site = wrap2.host room_id = message_parts[1] if not room_id.isdigit(): return False, "Room ID is invalid." else: room_id = int(room_id) quiet_action = ("-" in message_parts[2]) se_site = message_parts[2].replace('-', '') r = remove_from_notification_list(user_id, chat_site, room_id, se_site) if r: if not quiet_action: return "I will no longer ping you if I report a post on `%s`, in room `%s` on `chat.%s`" % (se_site, room_id, chat_site) else: return None else: return "That configuration doesn't exist." if content_lower.startswith("!!/willibenotified"): if len(message_parts) != 3: return False, "2 arguments expected" user_id = int(ev_user_id) chat_site = wrap2.host room_id = message_parts[1] if not room_id.isdigit(): return False, "Room ID is invalid" else: room_id = int(room_id) se_site = message_parts[2] will_be_notified = will_i_be_notified(user_id, chat_site, room_id, se_site) if will_be_notified: return "Yes, you will be notified for that site in that room." else: return "No, you won't be notified for that site in that room." if content_lower.startswith("!!/allnotificationsites"): if len(message_parts) != 2: return False, "1 argument expected" user_id = int(ev_user_id) chat_site = wrap2.host room_id = message_parts[1] if not room_id.isdigit(): return False, "Room ID is invalid." sites = get_all_notification_sites(user_id, chat_site, room_id) if len(sites) == 0: return "You won't get notified for any sites in that room." else: return "You will get notified for these sites:\r\n" + ", ".join(sites) return False, None # Unrecognized command, can be edited later.
def make_api_call_for_site(self, site): posts = self.queue.pop(site) store_bodyfetcher_queue() if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. min_query = "" if self.last_activity_date != 0: min_query = "&min=" + str(self.last_activity_date) pagesize = "50" else: pagesize = "25" url = "http://api.stackexchange.com/2.2/questions?site=stackoverflow&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw((&pagesize=" + pagesize + min_query else: url = "http://api.stackexchange.com/2.2/questions/" + ";".join(str(x) for x in posts) + "?site=" + site + "&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw((" # wait to make sure API has/updates post data time.sleep(60) try: response = requests.get(url, timeout=20).json() except requests.exceptions.Timeout: return # could add some retrying logic here, but eh. add_or_update_api_data(site) if "quota_remaining" in response: if response["quota_remaining"] - GlobalVars.apiquota >= 1000 and GlobalVars.apiquota >= 0: GlobalVars.charcoal_hq.send_message("API quota rolled over with {} requests remaining.".format(GlobalVars.apiquota)) sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site, quota_used in sorted_calls_per_site: api_quota_used_per_site = api_quota_used_per_site + site.replace('.com', '').replace('.stackexchange', '') + ": " + str(quota_used) + "\n" api_quota_used_per_site = api_quota_used_per_site.strip() GlobalVars.charcoal_hq.send_message(api_quota_used_per_site, False) clear_api_data() elif response["quota_remaining"] == 0: GlobalVars.charcoal_hq.send_message("API reports no quota left! May be a glitch.") GlobalVars.apiquota = response["quota_remaining"] else: GlobalVars.charcoal_hq.send_message("The quota_remaining property was not in the API response.") if site == "stackoverflow.com": if len(response["items"]) > 0 and "last_activity_date" in response["items"][0]: self.last_activity_date = response["items"][0]["last_activity_date"] for post in response["items"]: if "title" not in post or "body" not in post: continue title = GlobalVars.parser.unescape(post["title"]) body = GlobalVars.parser.unescape(post["body"]) link = post["link"] try: owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"]) owner_link = post["owner"]["link"] owner_rep = post["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 q_id = str(post["question_id"]) is_spam, reason, why = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False, owner_rep) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, q_id, reason, False, why) except: print "NOP" try: for answer in post["answers"]: answer_title = "" body = answer["body"] print "got answer from owner with name " + owner_name link = answer["link"] a_id = str(answer["answer_id"]) try: owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"]) owner_link = answer["owner"]["link"] owner_rep = answer["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 is_spam, reason, why = check_if_spam(answer_title, body, owner_name, owner_link, site, a_id, True, False, owner_rep) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, a_id, reason, True, why) except: print "NOP" except: print "no answers" return
def make_api_call_for_site(self, site): self.queue_modify_lock.acquire() if site not in self.queue: GlobalVars.charcoal_hq.send_message("Attempted API call to {} but there are no posts to fetch.".format(site)) return posts = self.queue.pop(site) store_bodyfetcher_queue() self.queue_modify_lock.release() question_modifier = "" pagesize_modifier = "" if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. if self.last_activity_date != 0: pagesize = "50" else: pagesize = "25" pagesize_modifier = "&pagesize={pagesize}&min={time_length}".format(pagesize=pagesize, time_length=str(self.last_activity_date)) else: question_modifier = "/{0}".format(";".join(str(post) for post in posts)) url = "http://api.stackexchange.com/2.2/questions{q_modifier}?site={site}&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw(({optional_min_query_param}".format(q_modifier=question_modifier, site=site, optional_min_query_param=pagesize_modifier) # wait to make sure API has/updates post data time.sleep(3) # Respect backoff, if we were given one if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) try: response = requests.get(url, timeout=20).json() except requests.exceptions.Timeout: return # could add some retrying logic here, but eh. self.api_data_lock.acquire() add_or_update_api_data(site) self.api_data_lock.release() message_hq = "" if "quota_remaining" in response: if response["quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0: GlobalVars.charcoal_hq.send_message("API quota rolled over with {0} requests remaining. Current quota: {1}.".format(GlobalVars.apiquota, response["quota_remaining"])) sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site_name, quota_used in sorted_calls_per_site: api_quota_used_per_site += site_name.replace('.com', '').replace('.stackexchange', '') + ": {0}\n".format(str(quota_used)) api_quota_used_per_site = api_quota_used_per_site.strip() GlobalVars.charcoal_hq.send_message(api_quota_used_per_site, False) clear_api_data() if response["quota_remaining"] == 0: GlobalVars.charcoal_hq.send_message("API reports no quota left! May be a glitch.") GlobalVars.charcoal_hq.send_message(str(response)) # No code format for now? if GlobalVars.apiquota == -1: GlobalVars.charcoal_hq.send_message("Restart: API quota is {quota}.".format(quota=response["quota_remaining"])) GlobalVars.apiquota = response["quota_remaining"] else: message_hq = "The quota_remaining property was not in the API response." if "error_message" in response: message_hq += " Error: {}.".format(response["error_message"]) if "backoff" in response: if GlobalVars.api_backoff_time < time.time() + response["backoff"]: GlobalVars.api_backoff_time = time.time() + response["backoff"] match = regex.compile('/2.2/([^.]*)').search(url) url_part = match.group(1) if match else url message_hq += "\nBackoff received of {} seconds on request to `{}`".format(str(response["backoff"]), url_part) if len(message_hq) > 0: GlobalVars.charcoal_hq.send_message(message_hq.strip()) if "items" not in response: return if site == "stackoverflow.com": items = response["items"] if len(items) > 0 and "last_activity_date" in items[0]: self.last_activity_date = items[0]["last_activity_date"] for post in response["items"]: if "title" not in post or "body" not in post: continue title = GlobalVars.parser.unescape(post["title"]) body = GlobalVars.parser.unescape(post["body"]) link = post["link"] post_score = post["score"] up_vote_count = post["up_vote_count"] down_vote_count = post["down_vote_count"] try: owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"]) owner_link = post["owner"]["link"] owner_rep = post["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 q_id = str(post["question_id"]) is_spam, reason, why = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False, owner_rep, post_score) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, q_id, reason, False, why, owner_rep, post_score, up_vote_count, down_vote_count, None) except: print "NOP" try: for answer in post["answers"]: answer_title = "" body = answer["body"] print "got answer from owner with name " + owner_name link = answer["link"] a_id = str(answer["answer_id"]) post_score = answer["score"] up_vote_count = answer["up_vote_count"] down_vote_count = answer["down_vote_count"] try: owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"]) owner_link = answer["owner"]["link"] owner_rep = answer["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 is_spam, reason, why = check_if_spam(answer_title, body, owner_name, owner_link, site, a_id, True, False, owner_rep, post_score) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, a_id, reason, True, why, owner_rep, post_score, up_vote_count, down_vote_count, q_id) except: print "NOP" except: print "no answers" return
def make_api_call_for_site(self, site): posts = self.queue.pop(site) self.queue_store_lock.acquire() store_bodyfetcher_queue() self.queue_store_lock.release() if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. min_query = "" if self.last_activity_date != 0: min_query = "&min=" + str(self.last_activity_date) pagesize = "50" else: pagesize = "25" url = "http://api.stackexchange.com/2.2/questions?site=stackoverflow&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((&pagesize=" + pagesize + min_query else: url = "http://api.stackexchange.com/2.2/questions/" + ";".join( str(x) for x in posts ) + "?site=" + site + "&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((" # wait to make sure API has/updates post data time.sleep(3) # Respect backoff, if we were given one if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) try: response = requests.get(url, timeout=20).json() except requests.exceptions.Timeout: return # could add some retrying logic here, but eh. self.api_data_lock.acquire() add_or_update_api_data(site) self.api_data_lock.release() message_hq = "" if "quota_remaining" in response: if response[ "quota_remaining"] - GlobalVars.apiquota >= 1000 and GlobalVars.apiquota >= 0: GlobalVars.charcoal_hq.send_message( "API quota rolled over with {} requests remaining.".format( GlobalVars.apiquota)) sorted_calls_per_site = sorted( GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site, quota_used in sorted_calls_per_site: api_quota_used_per_site = api_quota_used_per_site + site.replace( '.com', '').replace('.stackexchange', '') + ": " + str(quota_used) + "\n" api_quota_used_per_site = api_quota_used_per_site.strip() GlobalVars.charcoal_hq.send_message(api_quota_used_per_site, False) clear_api_data() if response["quota_remaining"] == 0: GlobalVars.charcoal_hq.send_message( "API reports no quota left! May be a glitch.") GlobalVars.charcoal_hq.send_message( str(response)) # No code format for now? if GlobalVars.apiquota == -1: GlobalVars.charcoal_hq.send_message( "Restart: API quota is {}.".format( response["quota_remaining"])) GlobalVars.apiquota = response["quota_remaining"] else: message_hq = "The quota_remaining property was not in the API response." if "error_message" in response: message_hq = message_hq + " Error: {}.".format( response["error_message"]) if "backoff" in response: if GlobalVars.api_backoff_time < time.time() + response["backoff"]: GlobalVars.api_backoff_time = time.time() + response["backoff"] message_hq = message_hq + "\n" + "Backoff received of " + str( response["backoff"]) + " seconds." if len(message_hq) > 0: GlobalVars.charcoal_hq.send_message(message_hq.strip()) if "items" not in response: return if site == "stackoverflow.com": if len(response["items"] ) > 0 and "last_activity_date" in response["items"][0]: self.last_activity_date = response["items"][0][ "last_activity_date"] for post in response["items"]: if "title" not in post or "body" not in post: continue title = GlobalVars.parser.unescape(post["title"]) body = GlobalVars.parser.unescape(post["body"]) link = post["link"] post_score = post["score"] up_vote_count = post["up_vote_count"] down_vote_count = post["down_vote_count"] try: owner_name = GlobalVars.parser.unescape( post["owner"]["display_name"]) owner_link = post["owner"]["link"] owner_rep = post["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 q_id = str(post["question_id"]) is_spam, reason, why = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False, owner_rep, post_score) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, q_id, reason, False, why, owner_rep, post_score, up_vote_count, down_vote_count) except: print "NOP" try: for answer in post["answers"]: answer_title = "" body = answer["body"] print "got answer from owner with name " + owner_name link = answer["link"] a_id = str(answer["answer_id"]) post_score = answer["score"] up_vote_count = answer["up_vote_count"] down_vote_count = answer["down_vote_count"] try: owner_name = GlobalVars.parser.unescape( answer["owner"]["display_name"]) owner_link = answer["owner"]["link"] owner_rep = answer["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 is_spam, reason, why = check_if_spam( answer_title, body, owner_name, owner_link, site, a_id, True, False, owner_rep, post_score) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, a_id, reason, True, why, owner_rep, post_score, up_vote_count, down_vote_count) except: print "NOP" except: print "no answers" return
def make_api_call_for_site(self, site): with self.queue_lock: new_posts = self.queue.pop(site, None) if new_posts is None: # site was not in the queue return Tasks.do(store_bodyfetcher_queue) new_post_ids = [int(k) for k in new_posts.keys()] if GlobalVars.flovis is not None: for post_id in new_post_ids: GlobalVars.flovis.stage('bodyfetcher/api_request', site, post_id, { 'site': site, 'posts': list(new_posts.keys()) }) # Add queue timing data pop_time = datetime.utcnow() post_add_times = [(pop_time - v).total_seconds() for k, v in new_posts.items()] Tasks.do(add_queue_timing_data, site, post_add_times) store_max_ids = False with self.max_ids_modify_lock: if site in self.previous_max_ids and max( new_post_ids) > self.previous_max_ids[site]: previous_max_id = self.previous_max_ids[site] intermediate_posts = range(previous_max_id + 1, max(new_post_ids)) # We don't want to go over the 100-post API cutoff, so take the last # (100-len(new_post_ids)) from intermediate_posts intermediate_posts = intermediate_posts[-(100 - len(new_post_ids)):] # new_post_ids could contain edited posts, so merge it back in combined = chain(intermediate_posts, new_post_ids) # Could be duplicates, so uniquify posts = list(set(combined)) else: posts = new_post_ids new_post_ids_max = max(new_post_ids) if new_post_ids_max > self.previous_max_ids.get(site, 0): self.previous_max_ids[site] = new_post_ids_max store_max_ids = True if store_max_ids: schedule_store_bodyfetcher_max_ids() log('debug', "New IDs / Hybrid Intermediate IDs for {}:".format(site)) if len(new_post_ids) > 30: log( 'debug', "{} +{} more".format( sorted(new_post_ids)[:30], len(new_post_ids) - 30)) else: log('debug', sorted(new_post_ids)) if len(new_post_ids) == len(posts): log('debug', "[ *Identical* ]") elif len(posts) > 30: log('debug', "{} +{} more".format(sorted(posts)[:30], len(posts) - 30)) else: log('debug', sorted(posts)) question_modifier = "" pagesize_modifier = {} if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. with self.last_activity_date_lock: if self.last_activity_date != 0: pagesize = "100" else: pagesize = "50" pagesize_modifier = { 'pagesize': pagesize, 'min': str(self.last_activity_date - self.ACTIVITY_DATE_EXTRA_EARLIER_MS_TO_FETCH) } else: question_modifier = "/{0}".format(";".join( [str(post) for post in posts])) url = "https://api.stackexchange.com/2.2/questions{}".format( question_modifier) params = { 'filter': '!1rs)sUKylwB)8isvCRk.xNu71LnaxjnPS12*pX*CEOKbPFwVFdHNxiMa7GIVgzDAwMa', 'key': 'IAkbitmze4B8KpacUfLqkw((', 'site': site } params.update(pagesize_modifier) # wait to make sure API has/updates post data time.sleep(3) with GlobalVars.api_request_lock: # Respect backoff, if we were given one if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) try: time_request_made = datetime.utcnow().strftime('%H:%M:%S') response = requests.get(url, params=params, timeout=20).json() except (requests.exceptions.Timeout, requests.ConnectionError, Exception): # Any failure in the request being made (timeout or otherwise) should be added back to # the queue. with self.queue_lock: if site in self.queue: self.queue[site].update(new_posts) else: self.queue[site] = new_posts return with self.api_data_lock: add_or_update_api_data(site) message_hq = "" with GlobalVars.apiquota_rw_lock: if "quota_remaining" in response: quota_remaining = response["quota_remaining"] if quota_remaining - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0 \ and quota_remaining > 39980: tell_rooms_with( "debug", "API quota rolled over with {0} requests remaining. " "Current quota: {1}.".format( GlobalVars.apiquota, quota_remaining)) sorted_calls_per_site = sorted( GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site_name, quota_used in sorted_calls_per_site: sanatized_site_name = site_name.replace( '.com', '').replace('.stackexchange', '') api_quota_used_per_site += sanatized_site_name + ": {0}\n".format( str(quota_used)) api_quota_used_per_site = api_quota_used_per_site.strip( ) tell_rooms_with("debug", api_quota_used_per_site) clear_api_data() if quota_remaining == 0: tell_rooms_with( "debug", "API reports no quota left! May be a glitch.") tell_rooms_with( "debug", str(response)) # No code format for now? if GlobalVars.apiquota == -1: tell_rooms_with( "debug", "Restart: API quota is {quota}.".format( quota=quota_remaining)) GlobalVars.apiquota = quota_remaining else: message_hq = "The quota_remaining property was not in the API response." if "error_message" in response: message_hq += " Error: {} at {} UTC.".format( response["error_message"], time_request_made) if "error_id" in response and response["error_id"] == 502: if GlobalVars.api_backoff_time < time.time( ) + 12: # Add a backoff of 10 + 2 seconds as a default GlobalVars.api_backoff_time = time.time() + 12 message_hq += " Backing off on requests for the next 12 seconds." message_hq += " Previous URL: `{}`".format(url) if "backoff" in response: if GlobalVars.api_backoff_time < time.time( ) + response["backoff"]: GlobalVars.api_backoff_time = time.time( ) + response["backoff"] if len(message_hq) > 0 and "site is required" not in message_hq: message_hq = message_hq.strip() if len(message_hq) > 500: message_hq = "\n" + message_hq tell_rooms_with("debug", message_hq) if "items" not in response: return if site == "stackoverflow.com": items = response["items"] if len(items) > 0 and "last_activity_date" in items[0]: with self.last_activity_date_lock: self.last_activity_date = items[0]["last_activity_date"] num_scanned = 0 start_time = time.time() for post in response["items"]: if GlobalVars.flovis is not None: pnb = copy.deepcopy(post) if 'body' in pnb: pnb['body'] = 'Present, but truncated' if 'answers' in pnb: del pnb['answers'] if "title" not in post or "body" not in post: if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage( 'bodyfetcher/api_response/no_content', site, post['question_id'], pnb) continue post['site'] = site try: post['edited'] = (post['creation_date'] != post['last_edit_date']) except KeyError: post[ 'edited'] = False # last_edit_date not present = not edited question_doesnt_need_scan = is_post_recently_scanned_and_unchanged( post) add_recently_scanned_post(post) if not question_doesnt_need_scan: try: post_ = Post(api_response=post) except PostParseError as err: log( 'error', 'Error {0} when parsing post: {1!r}'.format( err, post_)) if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage( 'bodyfetcher/api_response/error', site, post['question_id'], pnb) continue num_scanned += 1 is_spam, reason, why = check_if_spam(post_) if is_spam: try: if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage( 'bodyfetcher/api_response/spam', site, post['question_id'], { 'post': pnb, 'check_if_spam': [is_spam, reason, why] }) handle_spam(post=post_, reasons=reason, why=why) except Exception as e: log('error', "Exception in handle_spam:", e) elif GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage( 'bodyfetcher/api_response/not_spam', site, post['question_id'], { 'post': pnb, 'check_if_spam': [is_spam, reason, why] }) try: if "answers" not in post: pass else: for answer in post["answers"]: if GlobalVars.flovis is not None: anb = copy.deepcopy(answer) if 'body' in anb: anb['body'] = 'Present, but truncated' num_scanned += 1 answer["IsAnswer"] = True # Necesssary for Post object answer[ "title"] = "" # Necessary for proper Post object creation answer[ "site"] = site # Necessary for proper Post object creation try: answer['edited'] = (answer['creation_date'] != answer['last_edit_date']) except KeyError: answer[ 'edited'] = False # last_edit_date not present = not edited answer_doesnt_need_scan = is_post_recently_scanned_and_unchanged( answer) add_recently_scanned_post(answer) if answer_doesnt_need_scan: continue answer_ = Post(api_response=answer, parent=post_) is_spam, reason, why = check_if_spam(answer_) if is_spam: try: if GlobalVars.flovis is not None and 'answer_id' in answer: GlobalVars.flovis.stage( 'bodyfetcher/api_response/spam', site, answer['answer_id'], { 'post': anb, 'check_if_spam': [is_spam, reason, why] }) handle_spam(answer_, reasons=reason, why=why) except Exception as e: log('error', "Exception in handle_spam:", e) elif GlobalVars.flovis is not None and 'answer_id' in answer: GlobalVars.flovis.stage( 'bodyfetcher/api_response/not_spam', site, answer['answer_id'], { 'post': anb, 'check_if_spam': [is_spam, reason, why] }) except Exception as e: log('error', "Exception handling answers:", e) end_time = time.time() scan_time = end_time - start_time GlobalVars.PostScanStat.add_stat(num_scanned, scan_time) return
def make_api_call_for_site(self, site): if site not in self.queue: return self.queue_modify_lock.acquire() new_posts = self.queue.pop(site) store_bodyfetcher_queue() self.queue_modify_lock.release() new_post_ids = [int(k) for k, v in new_posts.items()] if GlobalVars.flovis is not None: for post_id in new_post_ids: GlobalVars.flovis.stage('bodyfetcher/api_request', site, post_id, {'queue': dict([[sk, [k for k, v in sq.items()]] for sk, sq in self.queue.items()]), 'site': site, 'posts': [k for k, v in new_posts.items()]}) self.queue_timing_modify_lock.acquire() post_add_times = [v for k, v in new_posts.items()] pop_time = datetime.utcnow() for add_time in post_add_times: try: seconds_in_queue = (pop_time - add_time).total_seconds() if site in self.queue_timings: self.queue_timings[site].append(seconds_in_queue) else: self.queue_timings[site] = [seconds_in_queue] except: continue # Skip to next item if we've got invalid data or missing values. store_queue_timings() self.queue_timing_modify_lock.release() self.max_ids_modify_lock.acquire() if site in self.previous_max_ids and max(new_post_ids) > self.previous_max_ids[site]: previous_max_id = self.previous_max_ids[site] intermediate_posts = range(previous_max_id + 1, max(new_post_ids)) # We don't want to go over the 100-post API cutoff, so take the last # (100-len(new_post_ids)) from intermediate_posts intermediate_posts = intermediate_posts[(100 - len(new_post_ids)):] # new_post_ids could contain edited posts, so merge it back in combined = chain(intermediate_posts, new_post_ids) # Could be duplicates, so uniquify posts = list(set(combined)) else: posts = new_post_ids try: if max(new_post_ids) > self.previous_max_ids[site]: self.previous_max_ids[site] = max(new_post_ids) store_bodyfetcher_max_ids() except KeyError: self.previous_max_ids[site] = max(new_post_ids) store_bodyfetcher_max_ids() self.max_ids_modify_lock.release() log('debug', "New IDs / Hybrid Intermediate IDs for {0}:".format(site)) log('debug', sorted(new_post_ids)) log('debug', sorted(posts)) question_modifier = "" pagesize_modifier = "" if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. if self.last_activity_date != 0: pagesize = "50" else: pagesize = "25" pagesize_modifier = "&pagesize={pagesize}" \ "&min={time_length}".format(pagesize=pagesize, time_length=str(self.last_activity_date)) else: question_modifier = "/{0}".format(";".join(str(post) for post in posts)) url = "https://api.stackexchange.com/2.2/questions{q_modifier}?site={site}" \ "&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((" \ "{optional_min_query_param}".format(q_modifier=question_modifier, site=site, optional_min_query_param=pagesize_modifier) # wait to make sure API has/updates post data time.sleep(3) GlobalVars.api_request_lock.acquire() # Respect backoff, if we were given one if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) try: time_request_made = datetime.now().strftime('%H:%M:%S') response = requests.get(url, timeout=20).json() except (requests.exceptions.Timeout, requests.ConnectionError, Exception): # Any failure in the request being made (timeout or otherwise) should be added back to # the queue. self.queue_modify_lock.acquire() if site in self.queue: self.queue[site].update(new_posts) else: self.queue[site] = new_posts self.queue_modify_lock.release() GlobalVars.api_request_lock.release() return self.api_data_lock.acquire() add_or_update_api_data(site) self.api_data_lock.release() message_hq = "" if "quota_remaining" in response: if response["quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0: tell_rooms_with("debug", "API quota rolled over with {0} requests remaining. " "Current quota: {1}.".format(GlobalVars.apiquota, response["quota_remaining"])) sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site_name, quota_used in sorted_calls_per_site: sanatized_site_name = site_name.replace('.com', '').replace('.stackexchange', '') api_quota_used_per_site += sanatized_site_name + ": {0}\n".format(str(quota_used)) api_quota_used_per_site = api_quota_used_per_site.strip() tell_rooms_with("debug", api_quota_used_per_site) clear_api_data() if response["quota_remaining"] == 0: tell_rooms_with("debug", "API reports no quota left! May be a glitch.") tell_rooms_with("debug", str(response)) # No code format for now? if GlobalVars.apiquota == -1: tell_rooms_with("debug", "Restart: API quota is {quota}." .format(quota=response["quota_remaining"])) GlobalVars.apiquota = response["quota_remaining"] else: message_hq = "The quota_remaining property was not in the API response." if "error_message" in response: message_hq += " Error: {} at {} UTC.".format(response["error_message"], time_request_made) if "error_id" in response and response["error_id"] == 502: if GlobalVars.api_backoff_time < time.time() + 12: # Add a backoff of 10 + 2 seconds as a default GlobalVars.api_backoff_time = time.time() + 12 message_hq += " Backing off on requests for the next 12 seconds." message_hq += " Previous URL: `{}`".format(url) if "backoff" in response: if GlobalVars.api_backoff_time < time.time() + response["backoff"]: GlobalVars.api_backoff_time = time.time() + response["backoff"] GlobalVars.api_request_lock.release() if len(message_hq) > 0: tell_rooms_with("debug", message_hq.strip()) if "items" not in response: return if site == "stackoverflow.com": items = response["items"] if len(items) > 0 and "last_activity_date" in items[0]: self.last_activity_date = items[0]["last_activity_date"] num_scanned = 0 start_time = time.time() for post in response["items"]: if "title" not in post or "body" not in post: if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage('bodyfetcher/api_response/no_content', site, post['question_id'], post) continue post['site'] = site try: post_ = Post(api_response=post) except PostParseError as err: log('error', 'Error {0} when parsing post: {1!r}'.format(err, post_)) if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage('bodyfetcher/api_response/error', site, post['question_id'], post) continue num_scanned += 1 is_spam, reason, why = check_if_spam(post_) if is_spam: try: if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage('bodyfetcher/api_response/spam', site, post['question_id'], {'post': post, 'check_if_spam': [is_spam, reason, why]}) handle_spam(post=post_, reasons=reason, why=why) except Exception as e: log('error', "Exception in handle_spam:", e) elif GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage('bodyfetcher/api_response/not_spam', site, post['question_id'], {'post': post, 'check_if_spam': [is_spam, reason, why]}) try: if "answers" not in post: pass else: for answer in post["answers"]: num_scanned += 1 answer["IsAnswer"] = True # Necesssary for Post object answer["title"] = "" # Necessary for proper Post object creation answer["site"] = site # Necessary for proper Post object creation answer_ = Post(api_response=answer, parent=post_) is_spam, reason, why = check_if_spam(answer_) if is_spam: try: if GlobalVars.flovis is not None and 'answer_id' in answer: GlobalVars.flovis.stage('bodyfetcher/api_response/spam', site, answer['answer_id'], {'post': answer, 'check_if_spam': [is_spam, reason, why]}) handle_spam(answer_, reasons=reason, why=why) except Exception as e: log('error', "Exception in handle_spam:", e) elif GlobalVars.flovis is not None and 'answer_id' in answer: GlobalVars.flovis.stage('bodyfetcher/api_response/not_spam', site, answer['answer_id'], {'post': answer, 'check_if_spam': [is_spam, reason, why]}) except Exception as e: log('error', "Exception handling answers:", e) end_time = time.time() GlobalVars.posts_scan_stats_lock.acquire() GlobalVars.num_posts_scanned += num_scanned GlobalVars.post_scan_time += end_time - start_time GlobalVars.posts_scan_stats_lock.release() return
def handle_commands(content_lower, message_parts, ev_room, ev_user_id, ev_user_name, wrap2, content, message_id): message_url = "//chat." + wrap2.host + "/transcript/message/" + str(message_id) second_part_lower = "" if len(message_parts) < 2 else message_parts[1].lower() if re.compile("^:[0-9]+$").search(message_parts[0]): msg_id = int(message_parts[0][1:]) msg = wrap2.get_message(msg_id) msg_content = msg.content_source quiet_action = ("-" in message_parts[1].lower()) if str(msg.owner.id) != GlobalVars.smokeDetector_user_id[ev_room] or msg_content is None: return post_url = fetch_post_url_from_msg_content(msg_content) post_site_id = fetch_post_id_and_site_from_msg_content(msg_content) if post_site_id is not None: post_type = post_site_id[2] else: post_type = None if (second_part_lower.startswith("false") or second_part_lower.startswith("fp")) \ and is_privileged(ev_room, ev_user_id, wrap2): if post_site_id is None: return "That message is not a report." t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post, args=(post_url, second_part_lower, ev_user_name, )) t_metasmoke.start() add_false_positive((post_site_id[0], post_site_id[1])) user_added = False if message_parts[1].lower().startswith("falseu") or message_parts[1].lower().startswith("fpu"): url_from_msg = fetch_owner_url_from_msg_content(msg_content) if url_from_msg is not None: user = get_user_from_url(url_from_msg) if user is not None: add_whitelisted_user(user) user_added = True learned = False if post_type == "question": learned = bayesian_learn_title(fetch_title_from_msg_content(msg_content), "good") if learned and user_added and not quiet_action: return "Registered question as false positive, whitelisted user and added title to Bayesian doctype 'good'." elif learned and not quiet_action: return "Registered question as false positive and added title to Bayesian doctype 'good'." elif not learned: return "Registered question as false positive, but could not add title to Bayesian doctype 'good'." elif post_type == "answer": if user_added and not quiet_action: return "Registered answer as false positive and whitelisted user." elif not quiet_action: return "Registered answer as false positive." try: msg.delete() except: pass if (second_part_lower.startswith("true") or second_part_lower.startswith("tp")) \ and is_privileged(ev_room, ev_user_id, wrap2): if post_site_id is None: return "That message is not a report." t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post, args=(post_url, second_part_lower, ev_user_name, )) t_metasmoke.start() learned = False user_added = False if message_parts[1].lower().startswith("trueu") or message_parts[1].lower().startswith("tpu"): url_from_msg = fetch_owner_url_from_msg_content(msg_content) if url_from_msg is not None: user = get_user_from_url(url_from_msg) if user is not None: add_blacklisted_user(user, message_url, post_url) user_added = True if post_type == "question": learned = bayesian_learn_title(fetch_title_from_msg_content(msg_content), "bad") if learned and user_added and not quiet_action: return "Blacklisted user and registered question as true positive: added title to the Bayesian doctype 'bad'." elif learned and not quiet_action: return "Registered question as true positive: added title to the Bayesian doctype 'bad'." elif not learned: return "Something went wrong when registering question as true positive." elif post_type == "answer": if user_added and not quiet_action: return "Blacklisted user." elif not user_added: return "`true`/`tp` cannot be used for answers because their job is to add the title of the *question* to the Bayesian doctype 'bad'. If you want to blacklist the poster of the answer, use `trueu` or `tpu`." if second_part_lower.startswith("ignore") and is_privileged(ev_room, ev_user_id, wrap2): if post_site_id is None: return "That message is not a report." add_ignored_post(post_site_id[0:2]) if not quiet_action: return "Post ignored; alerts about it will no longer be posted." if (second_part_lower.startswith("delete") or second_part_lower.startswith("remove") or second_part_lower.startswith("gone") or second_part_lower.startswith("poof") or second_part_lower == "del") and is_privileged(ev_room, ev_user_id, wrap2): try: msg.delete() except: pass # couldn't delete message if second_part_lower.startswith("why"): t = fetch_post_id_and_site_from_msg_content(msg_content) if t is None: return "That's not a report." post_id, site, _ = t why = get_why(site, post_id) if why is None or why == "": return "There is no `why` data for that post (anymore)." else: return why if content_lower.startswith("!!/addblu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": add_blacklisted_user((uid, val), message_url, "") return "User blacklisted (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/addblu profileurl` *or* `!!/addblu userid sitename`." if content_lower.startswith("!!/rmblu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": if remove_blacklisted_user((uid, val)): return "User removed from blacklist (`{}` on `{}`).".format(uid, val) else: return "User is not blacklisted." elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/rmblu profileurl` *or* `!!/rmblu userid sitename`." if content_lower.startswith("!!/isblu"): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": if is_blacklisted_user((uid, val)): return "User is blacklisted. (`{}` on `{}`).".format(uid, val) else: return "User is not blacklisted. (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/isblu profileurl` *or* `!!/isblu userid sitename`." if content_lower.startswith("!!/addwlu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": add_whitelisted_user((uid, val)) return "User whitelisted (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/addwlu profileurl` *or* `!!/addwlu userid sitename`." if content_lower.startswith("!!/rmwlu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid != -1 and val != "": if remove_whitelisted_user((uid, val)): return "User removed from whitelist (`{}` on `{}`).".format(uid, val) else: return "User is not whitelisted." elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/rmwlu profileurl` *or* `!!/rmwlu userid sitename`." if content_lower.startswith("!!/iswlu"): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": if is_whitelisted_user((uid, val)): return "User is whitelisted. (`{}` on `{}`).".format(uid, val) else: return "User is not whitelisted. (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/iswlu profileurl` *or* `!!/iswlu userid sitename`." if content_lower.startswith("!!/report") \ and is_privileged(ev_room, ev_user_id, wrap2): if len(message_parts) < 2: return "Not enough arguments." url = message_parts[1] post_data = api_get_post(url) if post_data is None: return "That does not look like a valid post URL." if post_data is False: return "Could not find data for this post in the API. Check whether the post is not deleted yet." user = get_user_from_url(post_data.owner_url) if user is not None: add_blacklisted_user(user, message_url, post_data.post_url) bayesian_learn_title(post_data.title, "bad") handle_spam(post_data.title, post_data.body, post_data.owner_name, post_data.site, post_data.post_url, post_data.owner_url, post_data.post_id, ["Manually reported " + post_data.post_type], post_data.post_type == "answer") if content_lower.startswith("!!/wut"): return "Whaddya mean, 'wut'? Humans..." if content_lower.startswith("!!/lick"): return "*licks ice cream cone*" if content_lower.startswith("!!/alive"): if ev_room == GlobalVars.charcoal_room_id: return 'Of course' elif ev_room == GlobalVars.meta_tavern_room_id or ev_room == GlobalVars.socvr_room_id: return random.choice(['Yup', 'You doubt me?', 'Of course', '... did I miss something?', 'plz send teh coffee', 'Watching this endless list of new questions *never* gets boring', 'Kinda sorta']) if content_lower.startswith("!!/rev"): return '[' + \ GlobalVars.commit_with_author + \ '](https://github.com/Charcoal-SE/SmokeDetector/commit/' + \ GlobalVars.commit + \ ')' if content_lower.startswith("!!/status"): now = datetime.utcnow() diff = now - UtcDate.startup_utc_date minutes, remainder = divmod(diff.seconds, 60) minutestr = "minutes" if minutes != 1 else "minute" return 'Running since {} UTC ({} {})'.format(GlobalVars.startup_utc, minutes, minutestr) if content_lower.startswith("!!/reboot"): if is_privileged(ev_room, ev_user_id, wrap2): post_message_in_room(ev_room, "Goodbye, cruel world") os._exit(5) if content_lower.startswith("!!/stappit"): if is_privileged(ev_room, ev_user_id, wrap2): post_message_in_room(ev_room, "Goodbye, cruel world") os._exit(6) if content_lower.startswith("!!/master"): if is_privileged(ev_room, ev_user_id, wrap2): os._exit(8) if content_lower.startswith("!!/clearbl"): if is_privileged(ev_room, ev_user_id, wrap2): if os.path.isfile("blacklistedUsers.txt"): os.remove("blacklistedUsers.txt") GlobalVars.blacklisted_users = [] return "Kaboom, blacklisted users cleared." else: return "There are no blacklisted users at the moment." if content_lower.startswith("!!/block"): if is_privileged(ev_room, ev_user_id, wrap2): timeToBlock = content_lower[9:].strip() timeToBlock = int(timeToBlock) if timeToBlock else 0 if 0 < timeToBlock < 14400: GlobalVars.blockedTime = time.time() + timeToBlock else: GlobalVars.blockedTime = time.time() + 900 return "blocked" if content_lower.startswith("!!/unblock"): if is_privileged(ev_room, ev_user_id, wrap2): GlobalVars.blockedTime = time.time() return "unblocked" if content_lower.startswith("!!/errorlogs"): if is_privileged(ev_room, ev_user_id, wrap2): count = -1 if len(message_parts) != 2: return "The !!/errorlogs command requires 1 argument." try: count = int(message_parts[1]) except ValueError: pass if count == -1: return "Invalid argument." logs_part = fetch_lines_from_error_log(count) post_message_in_room(ev_room, logs_part, False) if content_lower.startswith("!!/pull"): if is_privileged(ev_room, ev_user_id, wrap2): r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/git/refs/heads/master') latest_sha = r.json()["object"]["sha"] r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/commits/' + latest_sha + '/statuses') states = [] for status in r.json(): state = status["state"] states.append(state) if "success" in states: os._exit(3) elif "error" in states or "failure" in states: return "CI build failed! :( Please check your commit." elif "pending" in states or not states: return "CI build is still pending, wait until the build has finished and then pull again." if content_lower.startswith("!!/help"): return "I'm [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector), a bot that detects spam and low-quality posts on the network and posts alerts to chat. [A command list is available here](https://github.com/Charcoal-SE/SmokeDetector/wiki/Commands)." if content_lower.startswith("!!/apiquota"): return GlobalVars.apiquota if content_lower.startswith("!!/whoami"): if (ev_room in GlobalVars.smokeDetector_user_id): return "My id for this room is {}".format(GlobalVars.smokeDetector_user_id[ev_room]) else: return "I don't know my user ID for this room. (Something is wrong, and it's apnorton's fault.)" if content_lower.startswith("!!/location"): return GlobalVars.location if content_lower.startswith("!!/queuestatus"): post_message_in_room(ev_room, GlobalVars.bodyfetcher.print_queue(), False) if content_lower.startswith("!!/blame") and (ev_room == GlobalVars.meta_tavern_room_id or ev_room == GlobalVars.socvr_room_id): GlobalVars.tavern_users_chatting = list(set(GlobalVars.tavern_users_chatting)) # Make unique user_to_blame = random.choice(GlobalVars.tavern_users_chatting) return "It's " + user_to_blame + "'s fault." if "smokedetector" in content_lower and "fault" in content_lower and ("xkcdbot" in ev_user_name.lower() or "bjb568" in ev_user_name.lower()): return "Liar" if content_lower.startswith("!!/coffee"): return "*brews coffee for @" + ev_user_name.replace(" ", "") + "*" if content_lower.startswith("!!/tea"): return "*brews a cup of " + random.choice(['earl grey', 'green', 'chamomile', 'lemon', 'darjeeling', 'mint']) + " tea for @" + ev_user_name.replace(" ", "") + "*" if content_lower.startswith("!!/brownie"): return "Brown!" if content_lower.startswith("!!/test"): string_to_test = content[8:] if len(string_to_test) == 0: return "Nothing to test" result = "> " reasons, why = FindSpam.test_post(string_to_test, string_to_test, string_to_test, "", False, False) if len(reasons) == 0: result += "Would not be caught for title, body and username." return result result += ", ".join(reasons).capitalize() if why is not None and len(why) > 0: result += "\n----------\n" result += why return result return None
def handle_websocket_data(data): if "message" not in data: return message = data['message'] if isinstance(message, Iterable): if "message" in message: chatcommunicate.tell_rooms_with("metasmoke", message['message']) elif "autoflag_fp" in message: event = message["autoflag_fp"] chatcommunicate.tell_rooms(event["message"], ("debug", "site-" + event["site"]), ("no-site-" + event["site"],), notify_site="/autoflag_fp") elif "exit" in message: os._exit(message["exit"]) elif "blacklist" in message: ids = (message['blacklist']['uid'], message['blacklist']['site']) datahandling.add_blacklisted_user(ids, "metasmoke", message['blacklist']['post']) datahandling.last_feedbacked = (ids, time.time() + 60) elif "unblacklist" in message: ids = (message['unblacklist']['uid'], message['unblacklist']['site']) datahandling.remove_blacklisted_user(ids) elif "naa" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"]) datahandling.add_ignored_post(post_site_id[0:2]) elif "fp" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"]) datahandling.add_false_positive(post_site_id[0:2]) elif "report" in message: post_data = apigetpost.api_get_post(message["report"]["post_link"]) if post_data is None or post_data is False: return if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \ and not datahandling.is_false_positive((post_data.post_id, post_data.site)): return user = parsing.get_user_from_url(post_data.owner_url) post = classes.Post(api_response=post_data.as_dict) scan_spam, scan_reasons, scan_why = spamhandling.check_if_spam(post) if scan_spam: why_append = u"This post would have also been caught for: " + \ u", ".join(scan_reasons).capitalize() + "\n" + scan_why else: why_append = u"This post would not have been caught otherwise." # Add user to blacklist *after* post is scanned if user is not None: datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url) why = u"Post manually reported by user *{}* from metasmoke.\n\n{}".format( message["report"]["user"], why_append) spamhandling.handle_spam(post=post, reasons=["Manually reported " + post_data.post_type], why=why) elif "deploy_updated" in message: sha = message["deploy_updated"]["head_commit"]["id"] if sha != os.popen('git log -1 --pretty="%H"').read(): if "autopull" in message["deploy_updated"]["head_commit"]["message"]: if only_blacklists_changed(GitManager.get_remote_diff()): commit_md = "[`{0}`](https://github.com/Charcoal-SE/SmokeDetector/commit/{0})" \ .format(sha[:7]) i = [] # Currently no issues with backlists for bl_file in glob('bad_*.txt') + glob('blacklisted_*.txt'): # Check blacklists for issues with open(bl_file, 'r') as lines: seen = dict() for lineno, line in enumerate(lines, 1): if line.endswith('\r\n'): i.append("DOS line ending at `{0}:{1}` in {2}".format(bl_file, lineno, commit_md)) if not line.endswith('\n'): i.append("No newline at end of `{0}` in {1}".format(bl_file, commit_md)) if line == '\n': i.append("Blank line at `{0}:{1}` in {2}".format(bl_file, lineno, commit_md)) if line in seen: i.append("Duplicate entry of {0} at lines {1} and {2} of {3} in {4}" .format(line.rstrip('\n'), seen[line], lineno, bl_file, commit_md)) seen[line] = lineno if i == []: # No issues GitManager.pull_remote() load_blacklists() chatcommunicate.tell_rooms_with("debug", "No code modified in {0}, only blacklists" " reloaded.".format(commit_md)) else: i.append("please fix before pulling.") chatcommunicate.tell_rooms_with("debug", ", ".join(i)) elif "commit_status" in message: c = message["commit_status"] sha = c["commit_sha"][:7] if c["commit_sha"] != os.popen('git log -1 --pretty="%H"').read(): if c["status"] == "success": if "autopull" in c["commit_message"]: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha})"\ " succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"], commit_sha=sha) chatcommunicate.tell_rooms_with("debug", s, notify_site="/ci") time.sleep(2) os._exit(3) else: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha}) succeeded.".format(ci_link=c["ci_url"], commit_sha=sha) chatcommunicate.tell_rooms_with("debug", s, notify_site="/ci") elif c["status"] == "failure": s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha}) failed.".format(ci_link=c["ci_url"], commit_sha=sha) chatcommunicate.tell_rooms_with("debug", s, notify_site="/ci") elif "everything_is_broken" in message: if message["everything_is_broken"] is True: os._exit(6)
def handle_websocket_data(data): if "message" not in data: return message = data['message'] if isinstance(message, Iterable): if "message" in message: chatcommunicate.tell_rooms_with("debug", message['message']) elif "exit" in message: os._exit(message["exit"]) elif "blacklist" in message: datahandling.add_blacklisted_user((message['blacklist']['uid'], message['blacklist']['site']), "metasmoke", message['blacklist']['post']) elif "naa" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"]) datahandling.add_ignored_post(post_site_id[0:2]) elif "fp" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"]) datahandling.add_false_positive(post_site_id[0:2]) elif "report" in message: post_data = apigetpost.api_get_post(message["report"]["post_link"]) if post_data is None or post_data is False: return if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \ and not datahandling.is_false_positive((post_data.post_id, post_data.site)): return user = parsing.get_user_from_url(post_data.owner_url) if user is not None: datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url) why = u"Post manually reported by user *{}* from metasmoke.\n".format(message["report"]["user"]) postobj = classes.Post(api_response={'title': post_data.title, 'body': post_data.body, 'owner': {'display_name': post_data.owner_name, 'reputation': post_data.owner_rep, 'link': post_data.owner_url}, 'site': post_data.site, 'is_answer': (post_data.post_type == "answer"), 'score': post_data.score, 'link': post_data.post_url, 'question_id': post_data.post_id, 'up_vote_count': post_data.up_vote_count, 'down_vote_count': post_data.down_vote_count}) spamhandling.handle_spam(post=postobj, reasons=["Manually reported " + post_data.post_type], why=why) elif "deploy_updated" in message: sha = message["deploy_updated"]["head_commit"]["id"] if sha != os.popen('git log --pretty=format:"%H" -n 1').read(): if "autopull" in message["deploy_updated"]["head_commit"]["message"]: if only_blacklists_changed(GitManager.get_remote_diff()): commit_md = "[`{0}`](https://github.com/Charcoal-SE/SmokeDetector/commit/{0})" \ .format(sha[:7]) i = [] # Currently no issues with backlists for bl_file in glob('bad_*.txt') + glob('blacklisted_*.txt'): # Check blacklists for issues with open(bl_file, 'r') as lines: seen = dict() for lineno, line in enumerate(lines, 1): if line.endswith('\r\n'): i.append("DOS line ending at `{0}:{1}` in {2}".format(bl_file, lineno, commit_md)) if not line.endswith('\n'): i.append("No newline at end of `{0}` in {1}".format(bl_file, commit_md)) if line == '\n': i.append("Blank line at `{0}:{1}` in {2}".format(bl_file, lineno, commit_md)) if line in seen: i.append("Duplicate entry of {0} at lines {1} and {2} of {3} in {4}" .format(line.rstrip('\n'), seen[line], lineno, bl_file, commit_md)) seen[line] = lineno if i == []: # No issues GitManager.pull_remote() load_blacklists() chatcommunicate.tell_rooms_with("debug", "No code modified in {0}, only blacklists" " reloaded.".format(commit_md)) else: i.append("please fix before pulling.") chatcommunicate.tell_rooms_with("debug", ", ".join(i)) elif "commit_status" in message: c = message["commit_status"] sha = c["commit_sha"][:7] if c["commit_sha"] != os.popen('git log --pretty=format:"%H" -n 1').read(): if c["status"] == "success": if "autopull" in c["commit_message"]: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha})"\ " succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"], commit_sha=sha) chatcommunicate.tell_rooms_with("debug", s) time.sleep(2) os._exit(3) else: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha}) succeeded.".format(ci_link=c["ci_url"], commit_sha=sha) chatcommunicate.tell_rooms_with("debug", s) elif c["status"] == "failure": s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha}) failed.".format(ci_link=c["ci_url"], commit_sha=sha) chatcommunicate.tell_rooms_with("debug", s) elif "everything_is_broken" in message: if message["everything_is_broken"] is True: os._exit(6)
def handle_commands(content_lower, message_parts, ev_room, ev_user_id, ev_user_name, wrap2, content): second_part_lower = "" if len(message_parts) < 2 else message_parts[1].lower() if re.compile(":[0-9]+").search(message_parts[0]): msg_id = int(message_parts[0][1:]) msg = wrap2.get_message(msg_id) msg_content = msg.content_source quiet_action = ("-" in message_parts[1].lower()) if str(msg.owner.id) != GlobalVars.smokeDetector_user_id[ev_room] or msg_content is None: return post_url = fetch_post_url_from_msg_content(msg_content) post_site_id = fetch_post_id_and_site_from_msg_content(msg_content) if post_site_id is not None: post_type = post_site_id[2] else: post_type = None if (second_part_lower.startswith("false") or second_part_lower.startswith("fp")) \ and is_privileged(ev_room, ev_user_id, wrap2): if post_site_id is None: return "That message is not a report." t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post, args=(post_url, second_part_lower, ev_user_name, )) t_metasmoke.start() add_false_positive((post_site_id[0], post_site_id[1])) user_added = False if message_parts[1].lower().startswith("falseu") or message_parts[1].lower().startswith("fpu"): url_from_msg = fetch_owner_url_from_msg_content(msg_content) if url_from_msg is not None: user = get_user_from_url(url_from_msg) if user is not None: add_whitelisted_user(user) user_added = True learned = False if post_type == "question": learned = bayesian_learn_title(fetch_title_from_msg_content(msg_content), "good") if learned and user_added and not quiet_action: return "Registered question as false positive, whitelisted user and added title to Bayesian doctype 'good'." elif learned and not quiet_action: return "Registered question as false positive and added title to Bayesian doctype 'good'." elif not learned: return "Registered question as false positive, but could not add title to Bayesian doctype 'good'." elif post_type == "answer": if user_added and not quiet_action: return "Registered answer as false positive and whitelisted user." elif not quiet_action: return "Registered answer as false positive." try: msg.delete() except: pass if (second_part_lower.startswith("true") or second_part_lower.startswith("tp")) \ and is_privileged(ev_room, ev_user_id, wrap2): if post_site_id is None: return "That message is not a report." t_metasmoke = Thread(target=Metasmoke.send_feedback_for_post, args=(post_url, second_part_lower, ev_user_name, )) t_metasmoke.start() learned = False user_added = False if message_parts[1].lower().startswith("trueu") or message_parts[1].lower().startswith("tpu"): url_from_msg = fetch_owner_url_from_msg_content(msg_content) if url_from_msg is not None: user = get_user_from_url(url_from_msg) if user is not None: add_blacklisted_user(user) user_added = True if post_type == "question": learned = bayesian_learn_title(fetch_title_from_msg_content(msg_content), "bad") if learned and user_added and not quiet_action: return "Blacklisted user and registered question as true positive: added title to the Bayesian doctype 'bad'." elif learned and not quiet_action: return "Registered question as true positive: added title to the Bayesian doctype 'bad'." elif not learned: return "Something went wrong when registering question as true positive." elif post_type == "answer": if user_added and not quiet_action: return "Blacklisted user." elif not user_added: return "`true`/`tp` cannot be used for answers because their job is to add the title of the *question* to the Bayesian doctype 'bad'. If you want to blacklist the poster of the answer, use `trueu` or `tpu`." if second_part_lower.startswith("ignore") and is_privileged(ev_room, ev_user_id, wrap2): if post_site_id is None: return "That message is not a report." add_ignored_post(post_site_id[0:2]) if not quiet_action: return "Post ignored; alerts about it will no longer be posted." if (second_part_lower.startswith("delete") or second_part_lower.startswith("remove") or second_part_lower.startswith("gone") or second_part_lower == "del") and is_privileged(ev_room, ev_user_id, wrap2): try: msg.delete() except: pass # couldn't delete message if second_part_lower.startswith("why"): t = fetch_post_id_and_site_from_msg_content(msg_content) if t is None: return "That's not a report." post_id, site, _ = t why = get_why(site, post_id) if why is None or why == "": return "There is no `why` data for that post (anymore)." else: return why if content_lower.startswith("!!/addblu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": add_blacklisted_user((uid, val)) return "User blacklisted (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/addblu profileurl` *or* `!!/addblu userid sitename`." if content_lower.startswith("!!/rmblu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": if remove_blacklisted_user((uid, val)): return "User removed from blacklist (`{}` on `{}`).".format(uid, val) else: return "User is not blacklisted." elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/rmblu profileurl` *or* `!!/rmblu userid sitename`." if content_lower.startswith("!!/isblu"): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": if is_blacklisted_user((uid, val)): return "User is blacklisted. (`{}` on `{}`).".format(uid, val) else: return "User is not blacklisted. (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/isblu profileurl` *or* `!!/isblu userid sitename`." if content_lower.startswith("!!/addwlu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": add_whitelisted_user((uid, val)) return "User whitelisted (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/addwlu profileurl` *or* `!!/addwlu userid sitename`." if content_lower.startswith("!!/rmwlu") \ and is_privileged(ev_room, ev_user_id, wrap2): uid, val = get_user_from_list_command(content_lower) if uid != -1 and val != "": if remove_whitelisted_user((uid, val)): return "User removed from whitelist (`{}` on `{}`).".format(uid, val) else: return "User is not whitelisted." elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/rmwlu profileurl` *or* `!!/rmwlu userid sitename`." if content_lower.startswith("!!/iswlu"): uid, val = get_user_from_list_command(content_lower) if uid > -1 and val != "": if is_whitelisted_user((uid, val)): return "User is whitelisted. (`{}` on `{}`).".format(uid, val) else: return "User is not whitelisted. (`{}` on `{}`).".format(uid, val) elif uid == -2: return "Error: {}".format(val) else: return "Invalid format. Valid format: `!!/iswlu profileurl` *or* `!!/iswlu userid sitename`." if content_lower.startswith("!!/report") \ and is_privileged(ev_room, ev_user_id, wrap2): if len(message_parts) < 2: return "Not enough arguments." url = message_parts[1] post_data = api_get_post(url) if post_data is None: return "That does not look like a valid post URL." if post_data is False: return "Could not find data for this post in the API. Check whether the post is not deleted yet." user = get_user_from_url(post_data.owner_url) if user is not None: add_blacklisted_user(user) bayesian_learn_title(post_data.title, "bad") handle_spam(post_data.title, post_data.body, post_data.owner_name, post_data.site, post_data.post_url, post_data.owner_url, post_data.post_id, ["Manually reported " + post_data.post_type], post_data.post_type == "answer") if content_lower.startswith("!!/wut"): return "Whaddya mean, 'wut'? Humans..." if content_lower.startswith("!!/lick"): return "*licks ice cream cone*" if content_lower.startswith("!!/alive"): if ev_room == GlobalVars.charcoal_room_id: return 'Of course' elif ev_room == GlobalVars.meta_tavern_room_id or ev_room == GlobalVars.socvr_room_id: return random.choice(['Yup', 'You doubt me?', 'Of course', '... did I miss something?', 'plz send teh coffee', 'Watching this endless list of new questions *never* gets boring', 'Kinda sorta']) if content_lower.startswith("!!/rev"): return '[' + \ GlobalVars.commit_with_author + \ '](https://github.com/Charcoal-SE/SmokeDetector/commit/' + \ GlobalVars.commit + \ ')' if content_lower.startswith("!!/status"): now = datetime.utcnow() diff = now - UtcDate.startup_utc_date minutes, remainder = divmod(diff.seconds, 60) minutestr = "minutes" if minutes != 1 else "minute" return 'Running since {} UTC ({} {})'.format(GlobalVars.startup_utc, minutes, minutestr) if content_lower.startswith("!!/reboot"): if is_privileged(ev_room, ev_user_id, wrap2): post_message_in_room(ev_room, "Goodbye, cruel world") os._exit(5) if content_lower.startswith("!!/stappit"): if is_privileged(ev_room, ev_user_id, wrap2): post_message_in_room(ev_room, "Goodbye, cruel world") os._exit(6) if content_lower.startswith("!!/master"): if is_privileged(ev_room, ev_user_id, wrap2): os._exit(8) if content_lower.startswith("!!/clearbl"): if is_privileged(ev_room, ev_user_id, wrap2): if os.path.isfile("blacklistedUsers.txt"): os.remove("blacklistedUsers.txt") GlobalVars.blacklisted_users = [] return "Kaboom, blacklisted users cleared." else: return "There are no blacklisted users at the moment." if content_lower.startswith("!!/block"): if is_privileged(ev_room, ev_user_id, wrap2): timeToBlock = content_lower[9:].strip() timeToBlock = int(timeToBlock) if timeToBlock else 0 if 0 < timeToBlock < 14400: GlobalVars.blockedTime = time.time() + timeToBlock else: GlobalVars.blockedTime = time.time() + 900 return "blocked" if content_lower.startswith("!!/unblock"): if is_privileged(ev_room, ev_user_id, wrap2): GlobalVars.blockedTime = time.time() return "unblocked" if content_lower.startswith("!!/errorlogs"): if is_privileged(ev_room, ev_user_id, wrap2): count = -1 if len(message_parts) != 2: return "The !!/errorlogs command requires 1 argument." try: count = int(message_parts[1]) except ValueError: pass if count == -1: return "Invalid argument." logs_part = fetch_lines_from_error_log(count) post_message_in_room(ev_room, logs_part, False) if content_lower.startswith("!!/pull"): if is_privileged(ev_room, ev_user_id, wrap2): r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/git/refs/heads/master') latest_sha = r.json()["object"]["sha"] r = requests.get('https://api.github.com/repos/Charcoal-SE/SmokeDetector/commits/' + latest_sha + '/statuses') states = [] for status in r.json(): state = status["state"] states.append(state) if "success" in states: os._exit(3) elif "error" in states or "failure" in states: return "CI build failed! :( Please check your commit." elif "pending" in states or not states: return "CI build is still pending, wait until the build has finished and then pull again." if content_lower.startswith("!!/help"): return "I'm [SmokeDetector](https://github.com/Charcoal-SE/SmokeDetector), a bot that detects spam and low-quality posts on the network and posts alerts to chat. [A command list is available here](https://github.com/Charcoal-SE/SmokeDetector/wiki/Commands)." if content_lower.startswith("!!/apiquota"): return GlobalVars.apiquota if content_lower.startswith("!!/whoami"): if (ev_room in GlobalVars.smokeDetector_user_id): return "My id for this room is {}".format(GlobalVars.smokeDetector_user_id[ev_room]) else: return "I don't know my user ID for this room. (Something is wrong, and it's apnorton's fault.)" if content_lower.startswith("!!/location"): return GlobalVars.location if content_lower.startswith("!!/queuestatus"): post_message_in_room(ev_room, GlobalVars.bodyfetcher.print_queue(), False) if content_lower.startswith("!!/blame") and ev_room == GlobalVars.meta_tavern_room_id: GlobalVars.tavern_users_chatting = list(set(GlobalVars.tavern_users_chatting)) # Make unique user_to_blame = random.choice(GlobalVars.tavern_users_chatting) return "It's " + user_to_blame + "'s fault." if "smokedetector" in content_lower and "fault" in content_lower and ("xkcdbot" in ev_user_name.lower() or "bjb568" in ev_user_name.lower()): return "Liar" if content_lower.startswith("!!/coffee"): return "*brews coffee for @" + ev_user_name.replace(" ", "") + "*" if content_lower.startswith("!!/tea"): return "*brews a cup of " + random.choice(['earl grey', 'green', 'chamomile', 'lemon', 'darjeeling', 'mint']) + " tea for @" + ev_user_name.replace(" ", "") + "*" if content_lower.startswith("!!/test"): string_to_test = content[8:] if len(string_to_test) == 0: return "Nothing to test" result = "> " reasons, why = FindSpam.test_post(string_to_test, string_to_test, string_to_test, "", False, False) if len(reasons) == 0: result += "Would not be caught for title, body and username." return result result += ", ".join(reasons).capitalize() if why is not None and len(why) > 0: result += "\n----------\n" result += why return result return None
def handle_websocket_data(data): if "message" not in data: return message = data['message'] if isinstance(message, Iterable): if "message" in message: GlobalVars.charcoal_hq.send_message(message['message']) elif "blacklist" in message: datahandling.add_blacklisted_user((message['blacklist']['uid'], message['blacklist']['site']), "metasmoke", message['blacklist']['post']) elif "naa" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"]) datahandling.add_ignored_post(post_site_id[0:2]) elif "fp" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"]) datahandling.add_false_positive(post_site_id[0:2]) elif "report" in message: post_data = apigetpost.api_get_post(message["report"]["post_link"]) if post_data is None or post_data is False: return if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \ and not datahandling.is_false_positive((post_data.post_id, post_data.site)): return user = parsing.get_user_from_url(post_data.owner_url) if user is not None: datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url) why = u"Post manually reported by user *{}* from metasmoke.\n".format(message["report"]["user"]) spamhandling.handle_spam(title=post_data.title, body=post_data.body, poster=post_data.owner_name, site=post_data.site, post_url=post_data.post_url, poster_url=post_data.owner_url, post_id=post_data.post_id, reasons=["Manually reported " + post_data.post_type], is_answer=post_data.post_type == "answer", why=why, owner_rep=post_data.owner_rep, post_score=post_data.score, up_vote_count=post_data.up_vote_count, down_vote_count=post_data.down_vote_count, question_id=post_data.question_id) elif "commit_status" in message: c = message["commit_status"] sha = c["commit_sha"][:7] if c["commit_sha"] != os.popen('git log --pretty=format:"%H" -n 1').read(): if c["status"] == "success": if "autopull" in c["commit_message"]: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha})"\ " succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"], commit_sha=sha) GlobalVars.charcoal_hq.send_message(s) time.sleep(2) os._exit(3) else: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha}) succeeded.".format(ci_link=c["ci_url"], commit_sha=sha) elif c["status"] == "failure": s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha}) failed.".format(ci_link=c["ci_url"], commit_sha=sha) # noinspection PyUnboundLocalVariable GlobalVars.charcoal_hq.send_message(s)
def handle_websocket_data(data): if "message" not in data: return message = data['message'] if isinstance(message, Iterable): if "message" in message: GlobalVars.charcoal_hq.send_message(message['message']) elif "exit" in message: os._exit(message["exit"]) elif "blacklist" in message: datahandling.add_blacklisted_user((message['blacklist']['uid'], message['blacklist']['site']), "metasmoke", message['blacklist']['post']) elif "naa" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"]) datahandling.add_ignored_post(post_site_id[0:2]) elif "fp" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"]) datahandling.add_false_positive(post_site_id[0:2]) elif "report" in message: post_data = apigetpost.api_get_post(message["report"]["post_link"]) if post_data is None or post_data is False: return if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \ and not datahandling.is_false_positive((post_data.post_id, post_data.site)): return user = parsing.get_user_from_url(post_data.owner_url) if user is not None: datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url) why = u"Post manually reported by user *{}* from metasmoke.\n".format(message["report"]["user"]) postobj = classes.Post(api_response={'title': post_data.title, 'body': post_data.body, 'owner': {'display_name': post_data.owner_name, 'reputation': post_data.owner_rep, 'link': post_data.owner_url}, 'site': post_data.site, 'IsAnswer': (post_data.post_type == "answer"), 'score': post_data.score, 'link': post_data.post_url, 'question_id': post_data.post_id, 'up_vote_count': post_data.up_vote_count, 'down_vote_count': post_data.down_vote_count}) spamhandling.handle_spam(post=postobj, reasons=["Manually reported " + post_data.post_type], why=why) elif "commit_status" in message: c = message["commit_status"] sha = c["commit_sha"][:7] if c["commit_sha"] != os.popen('git log --pretty=format:"%H" -n 1').read(): if c["status"] == "success": if "autopull" in c["commit_message"]: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha})"\ " succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"], commit_sha=sha) GlobalVars.charcoal_hq.send_message(s) time.sleep(2) os._exit(3) else: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha}) succeeded.".format(ci_link=c["ci_url"], commit_sha=sha) elif c["status"] == "failure": s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha}) failed.".format(ci_link=c["ci_url"], commit_sha=sha) # noinspection PyUnboundLocalVariable GlobalVars.charcoal_hq.send_message(s)
def allspam(msg, url): """ Reports all of a user's posts as spam :param msg: :param url: A user profile URL :return: """ crn, wait = can_report_now(msg.owner.id, msg._client.host) if not crn: raise CmdException("You can execute the !!/allspam command again in {} seconds. " "To avoid one user sending lots of reports in a few commands and " "slowing SmokeDetector down due to rate-limiting, you have to " "wait 30 seconds after you've reported multiple posts in " "one go.".format(wait)) user = get_user_from_url(url) if user is None: raise CmdException("That doesn't look like a valid user URL.") user_sites = [] user_posts = [] # Detect whether link is to network profile or site profile if user[1] == 'stackexchange.com': # Respect backoffs etc GlobalVars.api_request_lock.acquire() if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) # Fetch sites api_filter = "!6Pbp)--cWmv(1" request_url = "http://api.stackexchange.com/2.2/users/{}/associated?filter={}&key=IAkbitmze4B8KpacUfLqkw((" \ .format(user[0], api_filter) res = requests.get(request_url).json() if "backoff" in res: if GlobalVars.api_backoff_time < time.time() + res["backoff"]: GlobalVars.api_backoff_time = time.time() + res["backoff"] GlobalVars.api_request_lock.release() if 'items' not in res or len(res['items']) == 0: raise CmdException("The specified user does not appear to exist.") if res['has_more']: raise CmdException("The specified user has an abnormally high number of accounts. Please consider flagging " "for moderator attention, otherwise use !!/report on the user's posts individually.") # Add accounts with posts for site in res['items']: if site['question_count'] > 0 or site['answer_count'] > 0: user_sites.append((site['user_id'], get_api_sitename_from_url(site['site_url']))) else: user_sites.append((user[0], get_api_sitename_from_url(user[1]))) # Fetch posts for u_id, u_site in user_sites: # Respect backoffs etc GlobalVars.api_request_lock.acquire() if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) # Fetch posts api_filter = "!)Q4RrMH0DC96Y4g9yVzuwUrW" request_url = "http://api.stackexchange.com/2.2/users/{}/posts?site={}&filter={}&key=IAkbitmze4B8KpacUfLqkw((" \ .format(u_id, u_site, api_filter) res = requests.get(request_url).json() if "backoff" in res: if GlobalVars.api_backoff_time < time.time() + res["backoff"]: GlobalVars.api_backoff_time = time.time() + res["backoff"] GlobalVars.api_request_lock.release() if 'items' not in res or len(res['items']) == 0: raise CmdException("The specified user has no posts on this site.") posts = res['items'] if posts[0]['owner']['reputation'] > 100: raise CmdException("The specified user's reputation is abnormally high. Please consider flagging for " "moderator attention, otherwise use !!/report on the posts individually.") # Add blacklisted user - use most downvoted post as post URL message_url = "https://chat.{}/transcript/{}?m={}".format(msg._client.host, msg.room.id, msg.id) add_blacklisted_user(user, message_url, sorted(posts, key=lambda x: x['score'])[0]['owner']['link']) # TODO: Postdata refactor, figure out a better way to use apigetpost for post in posts: post_data = PostData() post_data.post_id = post['post_id'] post_data.post_url = url_to_shortlink(post['link']) *discard, post_data.site, post_data.post_type = fetch_post_id_and_site_from_url( url_to_shortlink(post['link'])) post_data.title = unescape(post['title']) post_data.owner_name = unescape(post['owner']['display_name']) post_data.owner_url = post['owner']['link'] post_data.owner_rep = post['owner']['reputation'] post_data.body = post['body'] post_data.score = post['score'] post_data.up_vote_count = post['up_vote_count'] post_data.down_vote_count = post['down_vote_count'] if post_data.post_type == "answer": # Annoyingly we have to make another request to get the question ID, since it is only returned by the # /answers route # Respect backoffs etc GlobalVars.api_request_lock.acquire() if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) # Fetch posts filter = "!*Jxb9s5EOrE51WK*" req_url = "http://api.stackexchange.com/2.2/answers/{}?site={}&filter={}&key=IAkbitmze4B8KpacUfLqkw((" \ .format(post['post_id'], u_site, filter) answer_res = requests.get(req_url).json() if "backoff" in res: if GlobalVars.api_backoff_time < time.time() + res["backoff"]: GlobalVars.api_backoff_time = time.time() + res["backoff"] GlobalVars.api_request_lock.release() # Finally, set the attribute post_data.question_id = answer_res['items'][0]['question_id'] post_data.is_answer = True user_posts.append(post_data) if len(user_posts) == 0: raise CmdException("The specified user hasn't posted anything.") if len(user_posts) > 15: raise CmdException("The specified user has an abnormally high number of spam posts. Please consider flagging " "for moderator attention, otherwise use !!/report on the posts individually.") why_info = u"User manually reported by *{}* in room *{}*.\n".format(msg.owner.name, msg.room.name) # Handle all posts for index, post in enumerate(user_posts, start=1): batch = "" if len(user_posts) > 1: batch = " (batch report: post {} out of {})".format(index, len(user_posts)) handle_spam(post=Post(api_response=post.as_dict), reasons=["Manually reported " + post.post_type + batch], why=why_info) time.sleep(2) # Should this be implemented differently? if len(user_posts) > 2: add_or_update_multiple_reporter(msg.owner.id, msg._client.host, time.time())
def make_api_call_for_site(self, site): posts = self.queue.pop(site) if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. min_query = "" if self.last_activity_date != 0: min_query = "&min=" + str(self.last_activity_date) pagesize = "50" else: pagesize = "25" url = "http://api.stackexchange.com/2.2/questions?site=stackoverflow&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw((&pagesize=" + pagesize + min_query else: url = "http://api.stackexchange.com/2.2/questions/" + ";".join( str(x) for x in posts ) + "?site=" + site + "&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw((" # wait to make sure API has/updates post data time.sleep(60) try: response = requests.get(url, timeout=20).json() except requests.exceptions.Timeout: return # could add some retrying logic here, but eh. if "quota_remaining" in response: GlobalVars.apiquota = response["quota_remaining"] else: GlobalVars.apiquota = 0 return if site == "stackoverflow.com": if len(response["items"] ) > 0 and "last_activity_date" in response["items"][0]: self.last_activity_date = response["items"][0][ "last_activity_date"] for post in response["items"]: if "title" not in post or "body" not in post: continue title = GlobalVars.parser.unescape(post["title"]) body = GlobalVars.parser.unescape(post["body"]) link = post["link"] try: owner_name = GlobalVars.parser.unescape( post["owner"]["display_name"]) owner_link = post["owner"]["link"] owner_rep = post["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 q_id = str(post["question_id"]) if owner_rep <= 50: is_spam, reason, why = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, q_id, reason, False, why) except: print "NOP" classified, gibberish_score = classify_gibberish(body, site) if classified and gibberish_score >= 65: GlobalVars.bayesian_testroom.send_message( "[ SmokeDetector | GibberishClassifierBeta ] " u"Potential gibberish body ({}%): [{}]({}) on `{}`".format( gibberish_score, title, link, site)) try: for answer in post["answers"]: answer_title = "" body = answer["body"] print "got answer from owner with name " + owner_name link = answer["link"] a_id = str(answer["answer_id"]) try: owner_name = GlobalVars.parser.unescape( answer["owner"]["display_name"]) owner_link = answer["owner"]["link"] owner_rep = answer["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 if owner_rep <= 50: is_spam, reason, why = check_if_spam( answer_title, body, owner_name, owner_link, site, a_id, True, False) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, a_id, reason, True, why) except: print "NOP" classified, gibberish_score = classify_gibberish( body, site) if classified and gibberish_score >= 65: GlobalVars.bayesian_testroom.send_message( "[ SmokeDetector | GibberishClassifierBeta ] " u"Potential gibberish answer ({}%): [{}]({}) on `{}`" .format(gibberish_score, title, link, site)) except: print "no answers" return
def report(msg, urls): """ Report a post (or posts) :param msg: :param urls: :return: A string (or None) """ crn, wait = can_report_now(msg.owner.id, msg._client.host) if not crn: raise CmdException("You can execute the !!/report command again in {} seconds. " "To avoid one user sending lots of reports in a few commands and " "slowing SmokeDetector down due to rate-limiting, you have to " "wait 30 seconds after you've reported multiple posts in " "one go.".format(wait)) output = [] urls = list(set(urls.split())) if len(urls) > 5: raise CmdException("To avoid SmokeDetector reporting posts too slowly, you can " "report at most 5 posts at a time. This is to avoid " "SmokeDetector's chat messages getting rate-limited too much, " "which would slow down reports.") for index, url in enumerate(urls, start=1): post_data = api_get_post(url) if post_data is None: output.append("Post {}: That does not look like a valid post URL.".format(index)) continue if post_data is False: output.append("Post {}: Could not find data for this post in the API. " "It may already have been deleted.".format(index)) continue if has_already_been_posted(post_data.site, post_data.post_id, post_data.title) and not is_false_positive( (post_data.post_id, post_data.site)): # Don't re-report if the post wasn't marked as a false positive. If it was marked as a false positive, # this re-report might be attempting to correct that/fix a mistake/etc. if GlobalVars.metasmoke_key is not None: se_link = to_protocol_relative(post_data.post_url) ms_link = "https://m.erwaysoftware.com/posts/by-url?url={}".format(se_link) output.append("Post {}: Already recently reported [ [MS]({}) ]".format(index, ms_link)) continue else: output.append("Post {}: Already recently reported".format(index)) continue post_data.is_answer = (post_data.post_type == "answer") post = Post(api_response=post_data.as_dict) user = get_user_from_url(post_data.owner_url) if user is not None: message_url = "https://chat.{}/transcript/{}?m={}".format(msg._client.host, msg.room.id, msg.id) add_blacklisted_user(user, message_url, post_data.post_url) why_info = u"Post manually reported by user *{}* in room *{}*.\n".format(msg.owner.name, msg.room.name) batch = "" if len(urls) > 1: batch = " (batch report: post {} out of {})".format(index, len(urls)) handle_spam(post=post, reasons=["Manually reported " + post_data.post_type + batch], why=why_info) if 1 < len(urls) > len(output): add_or_update_multiple_reporter(msg.owner.id, msg._client.host, time.time()) if len(output) > 0: return os.linesep.join(output)
def command_report_post(ev_room, ev_user_id, wrap2, message_parts, message_url, ev_user_name, ev_room_name, *args, **kwargs): """ Report a post (or posts) :param ev_room_name: :param ev_user_name: :param message_url: :param message_parts: :param wrap2: :param ev_user_id: :param ev_room: :param kwargs: No additional arguments expected :return: A string (or None) """ crn, wait = can_report_now(ev_user_id, wrap2.host) if not crn: return Response(command_status=False, message="You can execute the !!/report command again in {} seconds. " "To avoid one user sending lots of reports in a few commands and " "slowing SmokeDetector down due to rate-limiting, you have to " "wait 30 seconds after you've reported multiple posts using " "!!/report, even if your current command just has one URL. (Note " "that this timeout won't be applied if you only used !!/report " "for one post)".format(wait)) if len(message_parts) < 2: return Response(command_status=False, message="Not enough arguments.") output = [] index = 0 urls = list(set(message_parts[1:])) if len(urls) > 5: return Response(command_status=False, message="To avoid SmokeDetector reporting posts too slowly, you can " "report at most 5 posts at a time. This is to avoid " "SmokeDetector's chat messages getting rate-limited too much, " "which would slow down reports.") for url in urls: index += 1 post_data = api_get_post(url) if post_data is None: output.append("Post {}: That does not look like a valid post URL.".format(index)) continue if post_data is False: output.append("Post {}: Could not find data for this post in the API. " "It may already have been deleted.".format(index)) continue user = get_user_from_url(post_data.owner_url) if user is not None: add_blacklisted_user(user, message_url, post_data.post_url) why = u"Post manually reported by user *{}* in room *{}*.\n".format(ev_user_name, ev_room_name.decode('utf-8')) batch = "" if len(urls) > 1: batch = " (batch report: post {} out of {})".format(index, len(urls)) handle_spam(title=post_data.title, body=post_data.body, poster=post_data.owner_name, site=post_data.site, post_url=post_data.post_url, poster_url=post_data.owner_url, post_id=post_data.post_id, reasons=["Manually reported " + post_data.post_type + batch], is_answer=post_data.post_type == "answer", why=why, owner_rep=post_data.owner_rep, post_score=post_data.score, up_vote_count=post_data.up_vote_count, down_vote_count=post_data.down_vote_count, question_id=post_data.question_id) if 1 < len(urls) > len(output): add_or_update_multiple_reporter(ev_user_id, wrap2.host, time.time()) if len(output) > 0: return Response(command_status=True, message=os.linesep.join(output)) return Response(command_status=True, message=None)
def make_api_call_for_site(self, site): self.queue_modify_lock.acquire() if site not in self.queue: GlobalVars.charcoal_hq.send_message( "Attempted API call to {} but there are no posts to fetch.". format(site)) return posts = self.queue.pop(site) store_bodyfetcher_queue() self.queue_modify_lock.release() question_modifier = "" pagesize_modifier = "" if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. if self.last_activity_date != 0: pagesize = "50" else: pagesize = "25" pagesize_modifier = "&pagesize={pagesize}&min={time_length}".format( pagesize=pagesize, time_length=str(self.last_activity_date)) else: question_modifier = "/{0}".format(";".join( str(post) for post in posts)) url = "http://api.stackexchange.com/2.2/questions{q_modifier}?site={site}&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw(({optional_min_query_param}".format( q_modifier=question_modifier, site=site, optional_min_query_param=pagesize_modifier) # wait to make sure API has/updates post data time.sleep(3) # Respect backoff, if we were given one if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) try: response = requests.get(url, timeout=20).json() except requests.exceptions.Timeout: return # could add some retrying logic here, but eh. self.api_data_lock.acquire() add_or_update_api_data(site) self.api_data_lock.release() message_hq = "" if "quota_remaining" in response: if response[ "quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0: GlobalVars.charcoal_hq.send_message( "API quota rolled over with {0} requests remaining. Current quota: {1}." .format(GlobalVars.apiquota, response["quota_remaining"])) sorted_calls_per_site = sorted( GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site_name, quota_used in sorted_calls_per_site: api_quota_used_per_site += site_name.replace( '.com', '').replace('.stackexchange', '') + ": {0}\n".format( str(quota_used)) api_quota_used_per_site = api_quota_used_per_site.strip() GlobalVars.charcoal_hq.send_message(api_quota_used_per_site, False) clear_api_data() if response["quota_remaining"] == 0: GlobalVars.charcoal_hq.send_message( "API reports no quota left! May be a glitch.") GlobalVars.charcoal_hq.send_message( str(response)) # No code format for now? if GlobalVars.apiquota == -1: GlobalVars.charcoal_hq.send_message( "Restart: API quota is {quota}.".format( quota=response["quota_remaining"])) GlobalVars.apiquota = response["quota_remaining"] else: message_hq = "The quota_remaining property was not in the API response." if "error_message" in response: message_hq += " Error: {}.".format(response["error_message"]) if "backoff" in response: if GlobalVars.api_backoff_time < time.time() + response["backoff"]: GlobalVars.api_backoff_time = time.time() + response["backoff"] match = regex.compile('/2.2/([^.]*)').search(url) url_part = match.group(1) if match else url message_hq += "\nBackoff received of {} seconds on request to `{}`".format( str(response["backoff"]), url_part) if len(message_hq) > 0: GlobalVars.charcoal_hq.send_message(message_hq.strip()) if "items" not in response: return if site == "stackoverflow.com": items = response["items"] if len(items) > 0 and "last_activity_date" in items[0]: self.last_activity_date = items[0]["last_activity_date"] for post in response["items"]: if "title" not in post or "body" not in post: continue title = GlobalVars.parser.unescape(post["title"]) body = GlobalVars.parser.unescape(post["body"]) link = post["link"] post_score = post["score"] up_vote_count = post["up_vote_count"] down_vote_count = post["down_vote_count"] try: owner_name = GlobalVars.parser.unescape( post["owner"]["display_name"]) owner_link = post["owner"]["link"] owner_rep = post["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 q_id = str(post["question_id"]) is_spam, reason, why = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False, owner_rep, post_score) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, q_id, reason, False, why, owner_rep, post_score, up_vote_count, down_vote_count, None) except: print "NOP" try: for answer in post["answers"]: answer_title = "" body = answer["body"] print "got answer from owner with name " + owner_name link = answer["link"] a_id = str(answer["answer_id"]) post_score = answer["score"] up_vote_count = answer["up_vote_count"] down_vote_count = answer["down_vote_count"] try: owner_name = GlobalVars.parser.unescape( answer["owner"]["display_name"]) owner_link = answer["owner"]["link"] owner_rep = answer["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 is_spam, reason, why = check_if_spam( answer_title, body, owner_name, owner_link, site, a_id, True, False, owner_rep, post_score) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, a_id, reason, True, why, owner_rep, post_score, up_vote_count, down_vote_count, q_id) except: print "NOP" except: print "no answers" return
def init_websocket(self): try: GlobalVars.metasmoke_ws = websocket.create_connection(GlobalVars.metasmoke_ws_host, origin=GlobalVars.metasmoke_host) GlobalVars.metasmoke_ws.send(json.dumps({"command": "subscribe", "identifier": "{\"channel\":\"SmokeDetectorChannel\",\"key\":\"" + GlobalVars.metasmoke_key + "\"}"})) while True: a = GlobalVars.metasmoke_ws.recv() print(a) try: data = json.loads(a) if "message" in data: message = data['message'] if isinstance(message, Iterable): if "message" in message: GlobalVars.charcoal_hq.send_message(message['message']) elif "blacklist" in message: datahandling.add_blacklisted_user((message['blacklist']['uid'], message['blacklist']['site']), "metasmoke", message['blacklist']['post']) elif "naa" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"]) datahandling.add_ignored_post(post_site_id[0:2]) elif "fp" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"]) datahandling.add_false_positive(post_site_id[0:2]) elif "report" in message: post_data = apigetpost.api_get_post(message["report"]["post_link"]) if post_data is None or post_data is False: continue if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) and not datahandling.is_false_positive((post_data.post_id, post_data.site)): continue user = parsing.get_user_from_url(post_data.owner_url) if user is not None: datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url) why = u"Post manually reported by user *{}* from metasmoke.\n".format(message["report"]["user"]) spamhandling.handle_spam(title=post_data.title, body=post_data.body, poster=post_data.owner_name, site=post_data.site, post_url=post_data.post_url, poster_url=post_data.owner_url, post_id=post_data.post_id, reasons=["Manually reported " + post_data.post_type], is_answer=post_data.post_type == "answer", why=why, owner_rep=post_data.owner_rep, post_score=post_data.score, up_vote_count=post_data.up_vote_count, down_vote_count=post_data.down_vote_count, question_id=post_data.question_id) elif "commit_status" in message: c = message["commit_status"] sha = c["commit_sha"][:7] if c["commit_sha"] != os.popen('git log --pretty=format:"%H" -n 1').read(): if c["status"] == "success": if "autopull" in c["commit_message"]: GlobalVars.charcoal_hq.send_message("[CI]({ci_link}) on {commit_sha} succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"], commit_sha=sha)) time.sleep(2) os._exit(3) else: GlobalVars.charcoal_hq.send_message("[CI]({ci_link}) on {commit_sha} succeeded.".format(ci_link=c["ci_url"], commit_sha=sha)) elif c["status"] == "failure": GlobalVars.charcoal_hq.send_message("[CI]({ci_link}) on {commit_sha} failed.".format(ci_link=c["ci_url"], commit_sha=sha)) except Exception, e: GlobalVars.metasmoke_ws = websocket.create_connection(GlobalVars.metasmoke_ws_host, origin=GlobalVars.metasmoke_host) GlobalVars.metasmoke_ws.send(json.dumps({"command": "subscribe", "identifier": "{\"channel\":\"SmokeDetectorChannel\"}"})) print e try: exc_info = sys.exc_info() traceback.print_exception(*exc_info) except: print "meh" except: print "Couldn't bind to MS websocket"
def handle_websocket_data(data): if "message" not in data: return message = data['message'] if isinstance(message, Iterable): if "message" in message: GlobalVars.charcoal_hq.send_message(message['message']) elif "exit" in message: os._exit(message["exit"]) elif "blacklist" in message: datahandling.add_blacklisted_user( (message['blacklist']['uid'], message['blacklist']['site']), "metasmoke", message['blacklist']['post']) elif "naa" in message: post_site_id = parsing.fetch_post_id_and_site_from_url( message["naa"]["post_link"]) datahandling.add_ignored_post(post_site_id[0:2]) elif "fp" in message: post_site_id = parsing.fetch_post_id_and_site_from_url( message["fp"]["post_link"]) datahandling.add_false_positive(post_site_id[0:2]) elif "report" in message: post_data = apigetpost.api_get_post( message["report"]["post_link"]) if post_data is None or post_data is False: return if datahandling.has_already_been_posted(post_data.site, post_data.post_id, post_data.title) \ and not datahandling.is_false_positive((post_data.post_id, post_data.site)): return user = parsing.get_user_from_url(post_data.owner_url) if user is not None: datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url) why = u"Post manually reported by user *{}* from metasmoke.\n".format( message["report"]["user"]) postobj = classes.Post( api_response={ 'title': post_data.title, 'body': post_data.body, 'owner': { 'display_name': post_data.owner_name, 'reputation': post_data.owner_rep, 'link': post_data.owner_url }, 'site': post_data.site, 'IsAnswer': (post_data.post_type == "answer"), 'score': post_data.score, 'link': post_data.post_url, 'question_id': post_data.post_id, 'up_vote_count': post_data.up_vote_count, 'down_vote_count': post_data.down_vote_count }) spamhandling.handle_spam( post=postobj, reasons=["Manually reported " + post_data.post_type], why=why) elif "deploy_updated" in message: sha = message["deploy_updated"]["head_commit"]["id"] if sha != os.popen('git log --pretty=format:"%H" -n 1').read(): if "autopull" in message["deploy_updated"]["head_commit"][ "message"]: if only_blacklists_changed( GitManager.get_remote_diff()): commit_md = "[`{0}`](https://github.com/Charcoal-SE/SmokeDetector/commit/{0})" \ .format(sha[:7]) i = [] # Currently no issues with backlists for bl_file in glob('bad_*.txt') + glob( 'blacklisted_*.txt' ): # Check blacklists for issues with open(bl_file, 'r') as lines: seen = dict() for lineno, line in enumerate(lines, 1): if line.endswith('\r\n'): i.append( "DOS line ending at `{0}:{1}` in {2}" .format( bl_file, lineno, commit_md)) if not line.endswith('\n'): i.append( "No newline at end of `{0}` in {1}" .format(bl_file, commit_md)) if line == '\n': i.append( "Blank line at `{0}:{1}` in {2}" .format( bl_file, lineno, commit_md)) if line in seen: i.append( "Duplicate entry of {0} at lines {1} and {2} of {3} in {4}" .format( line.rstrip('\n'), seen[line], lineno, bl_file, commit_md)) seen[line] = lineno if i == []: # No issues GitManager.pull_remote() load_blacklists() GlobalVars.charcoal_hq.send_message( "No code modified in {0}, only blacklists" " reloaded.".format(commit_md)) else: i.append("please fix before pulling.") GlobalVars.charcoal_hq.send_message( ", ".join(i)) elif "commit_status" in message: c = message["commit_status"] sha = c["commit_sha"][:7] if c["commit_sha"] != os.popen( 'git log --pretty=format:"%H" -n 1').read(): if c["status"] == "success": if "autopull" in c["commit_message"]: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha})"\ " succeeded. Message contains 'autopull', pulling...".format(ci_link=c["ci_url"], commit_sha=sha) GlobalVars.charcoal_hq.send_message(s) time.sleep(2) os._exit(3) else: s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha}) succeeded.".format(ci_link=c["ci_url"], commit_sha=sha) elif c["status"] == "failure": s = "[CI]({ci_link}) on [`{commit_sha}`](https://github.com/Charcoal-SE/SmokeDetector/" \ "commit/{commit_sha}) failed.".format(ci_link=c["ci_url"], commit_sha=sha) # noinspection PyUnboundLocalVariable GlobalVars.charcoal_hq.send_message(s) elif "everything_is_broken" in message: if message["everything_is_broken"] is True: os._exit(6)
def make_api_call_for_site(self, site): posts = self.queue.pop(site) url = "http://api.stackexchange.com/2.2/questions/" + ";".join(str(x) for x in posts) + "?site=" + site + "&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw((" # wait to make sure API has/updates post data time.sleep(60) try: response = requests.get(url, timeout=20).json() except requests.exceptions.Timeout: return # could add some retrying logic here, but eh. if "quota_remaining" in response: GlobalVars.apiquota = response["quota_remaining"] else: GlobalVars.apiquota = 0 return for post in response["items"]: if "title" not in post or "body" not in post: continue title = GlobalVars.parser.unescape(post["title"]) body = GlobalVars.parser.unescape(post["body"]) link = post["link"] try: owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"]) owner_link = post["owner"]["link"] owner_rep = post["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 q_id = str(post["question_id"]) is_spam, reason = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False) if owner_rep <= 50 and is_spam: try: handle_spam(title, owner_name, site, link, owner_link, q_id, reason, False) except: print "NOP" classified, gibberish_score = classify_gibberish(body, site) if classified and gibberish_score >= 65: GlobalVars.bayesian_testroom.send_message( "[ SmokeDetector | GibberishClassifierBeta ] " "Potential gibberish body (%s%%): [%s](%s) on `%s`" % (gibberish_score, title, link, site) ) try: for answer in post["answers"]: answer_title = "" body = answer["body"] print "got answer from owner with name " + owner_name link = answer["link"] a_id = str(answer["answer_id"]) try: owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"]) owner_link = answer["owner"]["link"] owner_rep = answer["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 is_spam, reason = check_if_spam(answer_title, body, owner_name, owner_link, site, a_id, True, False) if owner_rep <= 50 and is_spam: try: handle_spam(title, owner_name, site, link, owner_link, a_id, reason, True) except: print "NOP" classified, gibberish_score = classify_gibberish(body, site) if classified and gibberish_score >= 65: GlobalVars.bayesian_testroom.send_message( "[ SmokeDetector | GibberishClassifierBeta ] " "Potential gibberish answer (%s%%): [%s](%s) on `%s`" % (gibberish_score, title, link, site) ) except: print "no answers" return
def handle_commands(content_lower, message_parts, ev_room, ev_room_name, ev_user_id, ev_user_name, wrap2, content, message_id): if content_lower.startswith("!!/parse") \ and is_privileged(ev_room, ev_user_id, wrap2): string_to_parse = content[9:] print string_to_parse response = requests.get("http://*****:*****@" + ev_user_name.replace(" ", "") + "*" if content_lower.startswith("!!/tea"): return "*brews a cup of {choice} tea for @{user}*".format(choice=random.choice(['earl grey', 'green', 'chamomile', 'lemon', 'darjeeling', 'mint', 'jasmine']), user=ev_user_name.replace(" ", "")) if content_lower.startswith("!!/brownie"): return "Brown!" if content_lower.startswith("!!/hats"): wb_end = datetime(2016, 1, 4, 0, 0, 0) now = datetime.utcnow() if wb_end > now: diff = wb_end - now hours, remainder = divmod(diff.seconds, 3600) minutes, seconds = divmod(remainder, 60) daystr = "days" if diff.days != 1 else "day" hourstr = "hours" if hours != 1 else "hour" minutestr = "minutes" if minutes != 1 else "minute" secondstr = "seconds" if seconds != 1 else "second" return "HURRY UP AND EARN MORE HATS! Winterbash will be over in {} {}, {} {}, {} {}, and {} {}. :(".format(diff.days, daystr, hours, hourstr, minutes, minutestr, seconds, secondstr) return "Winterbash is over. :(" if content_lower.startswith("!!/test"): string_to_test = content[8:] test_as_answer = False if content_lower.startswith("!!/test-a"): string_to_test = content[10:] test_as_answer = True if len(string_to_test) == 0: return "Nothing to test" result = "> " reasons, why = FindSpam.test_post(string_to_test, string_to_test, string_to_test, "", test_as_answer, False, 1, 0) if len(reasons) == 0: result += "Would not be caught for title, {}, and username.".format("answer" if test_as_answer else "body") return result result += ", ".join(reasons).capitalize() if why is not None and len(why) > 0: result += "\n----------\n" result += why return result if content_lower.startswith("!!/amiprivileged"): if is_privileged(ev_room, ev_user_id, wrap2): return "Yes, you are a privileged user." return "No, you are not a privileged user." if content_lower.startswith("!!/notify"): if len(message_parts) != 3: return False, "2 arguments expected" user_id = int(ev_user_id) chat_site = wrap2.host room_id = message_parts[1] if not room_id.isdigit(): return False, "Room ID is invalid." room_id = int(room_id) quiet_action = ("-" in message_parts[2]) se_site = message_parts[2].replace('-', '') response, full_site = add_to_notification_list(user_id, chat_site, room_id, se_site) if response == 0: if quiet_action: return None return "You'll now get pings from me if I report a post on `{site_name}`, in room `{room_id}` on `chat.{chat_domain}`".format(site_name=se_site, room_id=room_id, chat_domain=chat_site) elif response == -1: return "That notification configuration is already registered." elif response == -2: return False, "The given SE site does not exist." if content_lower.startswith("!!/unnotify"): if len(message_parts) != 3: return False, "2 arguments expected" user_id = int(ev_user_id) chat_site = wrap2.host room_id = message_parts[1] if not room_id.isdigit(): return False, "Room ID is invalid." room_id = int(room_id) quiet_action = ("-" in message_parts[2]) se_site = message_parts[2].replace('-', '') response = remove_from_notification_list(user_id, chat_site, room_id, se_site) if response: if quiet_action: return None return "I will no longer ping you if I report a post on `{site_name}`, in room `{room_id}` on `chat.{chat_domain}`".format(site_name=se_site, room_id=room_id, chat_domain=chat_site) return "That configuration doesn't exist." if content_lower.startswith("!!/willibenotified"): if len(message_parts) != 3: return False, "2 arguments expected" user_id = int(ev_user_id) chat_site = wrap2.host room_id = message_parts[1] if not room_id.isdigit(): return False, "Room ID is invalid" room_id = int(room_id) se_site = message_parts[2] will_be_notified = will_i_be_notified(user_id, chat_site, room_id, se_site) if will_be_notified: return "Yes, you will be notified for that site in that room." return "No, you won't be notified for that site in that room." if content_lower.startswith("!!/allnotificationsites"): if len(message_parts) != 2: return False, "1 argument expected" user_id = int(ev_user_id) chat_site = wrap2.host room_id = message_parts[1] if not room_id.isdigit(): return False, "Room ID is invalid." sites = get_all_notification_sites(user_id, chat_site, room_id) if len(sites) == 0: return "You won't get notified for any sites in that room." return "You will get notified for these sites:\r\n" + ", ".join(sites) return False, None # Unrecognized command, can be edited later.
def make_api_call_for_site(self, site): posts = self.queue.pop(site) if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. min_query = "" if self.last_activity_date != 0: min_query = "&min=" + str(self.last_activity_date) pagesize = "50" else: pagesize = "25" url = ( "http://api.stackexchange.com/2.2/questions?site=stackoverflow&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw((&pagesize=" + pagesize + min_query ) else: url = ( "http://api.stackexchange.com/2.2/questions/" + ";".join(str(x) for x in posts) + "?site=" + site + "&filter=!4y_-sca-)pfAwlmP_1FxC6e5yzutRIcQvonAiP&key=IAkbitmze4B8KpacUfLqkw((" ) # wait to make sure API has/updates post data time.sleep(60) try: response = requests.get(url, timeout=20).json() except requests.exceptions.Timeout: return # could add some retrying logic here, but eh. if "quota_remaining" in response: GlobalVars.apiquota = response["quota_remaining"] else: GlobalVars.apiquota = 0 return if site == "stackoverflow.com": if len(response["items"]) > 0 and "last_activity_date" in response["items"][0]: self.last_activity_date = response["items"][0]["last_activity_date"] for post in response["items"]: if "title" not in post or "body" not in post: continue title = GlobalVars.parser.unescape(post["title"]) body = GlobalVars.parser.unescape(post["body"]) link = post["link"] try: owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"]) owner_link = post["owner"]["link"] owner_rep = post["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 q_id = str(post["question_id"]) if owner_rep <= 50: is_spam, reason, why = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, q_id, reason, False, why) except: print "NOP" classified, gibberish_score = classify_gibberish(body, site) if classified and gibberish_score >= 65: GlobalVars.bayesian_testroom.send_message( "[ SmokeDetector | GibberishClassifierBeta ] " u"Potential gibberish body ({}%): [{}]({}) on `{}`".format(gibberish_score, title, link, site) ) try: for answer in post["answers"]: answer_title = "" body = answer["body"] print "got answer from owner with name " + owner_name link = answer["link"] a_id = str(answer["answer_id"]) try: owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"]) owner_link = answer["owner"]["link"] owner_rep = answer["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 if owner_rep <= 50: is_spam, reason, why = check_if_spam( answer_title, body, owner_name, owner_link, site, a_id, True, False ) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, a_id, reason, True, why) except: print "NOP" classified, gibberish_score = classify_gibberish(body, site) if classified and gibberish_score >= 65: GlobalVars.bayesian_testroom.send_message( "[ SmokeDetector | GibberishClassifierBeta ] " u"Potential gibberish answer ({}%): [{}]({}) on `{}`".format( gibberish_score, title, link, site ) ) except: print "no answers" return
def make_api_call_for_site(self, site): if site not in self.queue: return self.queue_modify_lock.acquire() posts = self.queue.pop(site) store_bodyfetcher_queue() self.queue_modify_lock.release() question_modifier = "" pagesize_modifier = "" if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. if self.last_activity_date != 0: pagesize = "50" else: pagesize = "25" pagesize_modifier = "&pagesize={pagesize}" \ "&min={time_length}".format(pagesize=pagesize, time_length=str(self.last_activity_date)) else: question_modifier = "/{0}".format(";".join( str(post) for post in posts)) url = "https://api.stackexchange.com/2.2/questions{q_modifier}?site={site}" \ "&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((" \ "{optional_min_query_param}".format(q_modifier=question_modifier, site=site, optional_min_query_param=pagesize_modifier) # wait to make sure API has/updates post data time.sleep(3) GlobalVars.api_request_lock.acquire() # Respect backoff, if we were given one if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) try: time_request_made = datetime.now().strftime('%H:%M:%S') response = requests.get(url, timeout=20).json() except (requests.exceptions.Timeout, requests.ConnectionError, Exception): # Any failure in the request being made (timeout or otherwise) should be added back to # the queue. self.queue_modify_lock.acquire() if site in self.queue: self.queue[site].extend(posts) else: self.queue[site] = posts self.queue_modify_lock.release() return self.api_data_lock.acquire() add_or_update_api_data(site) self.api_data_lock.release() message_hq = "" if "quota_remaining" in response: if response[ "quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0: GlobalVars.charcoal_hq.send_message( "API quota rolled over with {0} requests remaining. " "Current quota: {1}.".format(GlobalVars.apiquota, response["quota_remaining"])) sorted_calls_per_site = sorted( GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site_name, quota_used in sorted_calls_per_site: sanatized_site_name = site_name.replace( '.com', '').replace('.stackexchange', '') api_quota_used_per_site += sanatized_site_name + ": {0}\n".format( str(quota_used)) api_quota_used_per_site = api_quota_used_per_site.strip() GlobalVars.charcoal_hq.send_message(api_quota_used_per_site, False) clear_api_data() if response["quota_remaining"] == 0: GlobalVars.charcoal_hq.send_message( "API reports no quota left! May be a glitch.") GlobalVars.charcoal_hq.send_message( str(response)) # No code format for now? if GlobalVars.apiquota == -1: GlobalVars.charcoal_hq.send_message( "Restart: API quota is {quota}.".format( quota=response["quota_remaining"])) GlobalVars.apiquota = response["quota_remaining"] else: message_hq = "The quota_remaining property was not in the API response." if "error_message" in response: message_hq += " Error: {} at {} UTC.".format( response["error_message"], time_request_made) if "error_id" in response and response["error_id"] == 502: if GlobalVars.api_backoff_time < time.time( ) + 12: # Add a backoff of 10 + 2 seconds as a default GlobalVars.api_backoff_time = time.time() + 12 message_hq += " Backing off on requests for the next 12 seconds." if "backoff" in response: if GlobalVars.api_backoff_time < time.time() + response["backoff"]: GlobalVars.api_backoff_time = time.time() + response["backoff"] GlobalVars.api_request_lock.release() if len(message_hq) > 0: GlobalVars.charcoal_hq.send_message(message_hq.strip()) if "items" not in response: return if site == "stackoverflow.com": items = response["items"] if len(items) > 0 and "last_activity_date" in items[0]: self.last_activity_date = items[0]["last_activity_date"] num_scanned = 0 start_time = time.time() for post in response["items"]: if "title" not in post or "body" not in post: continue num_scanned += 1 title = GlobalVars.parser.unescape(post["title"]) body = GlobalVars.parser.unescape(post["body"]) link = post["link"] post_score = post["score"] up_vote_count = post["up_vote_count"] down_vote_count = post["down_vote_count"] try: owner_name = GlobalVars.parser.unescape( post["owner"]["display_name"]) owner_link = post["owner"]["link"] owner_rep = post["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 q_id = str(post["question_id"]) is_spam, reason, why = check_if_spam(title=title, body=body, user_name=owner_name, user_url=owner_link, post_site=site, post_id=q_id, is_answer=False, body_is_summary=False, owner_rep=owner_rep, post_score=post_score) if is_spam: try: handle_spam(title=title, body=body, poster=owner_name, site=site, post_url=link, poster_url=owner_link, post_id=q_id, reasons=reason, is_answer=False, why=why, owner_rep=owner_rep, post_score=post_score, up_vote_count=up_vote_count, down_vote_count=down_vote_count, question_id=None) except: print "NOP" try: for answer in post["answers"]: num_scanned += 1 answer_title = "" body = answer["body"] print "got answer from owner with name " + owner_name link = answer["link"] a_id = str(answer["answer_id"]) post_score = answer["score"] up_vote_count = answer["up_vote_count"] down_vote_count = answer["down_vote_count"] try: owner_name = GlobalVars.parser.unescape( answer["owner"]["display_name"]) owner_link = answer["owner"]["link"] owner_rep = answer["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 is_spam, reason, why = check_if_spam(title=answer_title, body=body, user_name=owner_name, user_url=owner_link, post_site=site, post_id=a_id, is_answer=True, body_is_summary=False, owner_rep=owner_rep, post_score=post_score) if is_spam: try: handle_spam(title=title, body=body, poster=owner_name, site=site, post_url=link, poster_url=owner_link, post_id=a_id, reasons=reason, is_answer=True, why=why, owner_rep=owner_rep, post_score=post_score, up_vote_count=up_vote_count, down_vote_count=down_vote_count, question_id=q_id) except: print "NOP" except: print "no answers" end_time = time.time() GlobalVars.posts_scan_stats_lock.acquire() GlobalVars.num_posts_scanned += num_scanned GlobalVars.post_scan_time += end_time - start_time GlobalVars.posts_scan_stats_lock.release() return
def init_websocket(self): try: GlobalVars.metasmoke_ws = websocket.create_connection( GlobalVars.metasmoke_ws_host, origin=GlobalVars.metasmoke_host) GlobalVars.metasmoke_ws.send( json.dumps({ "command": "subscribe", "identifier": "{\"channel\":\"SmokeDetectorChannel\",\"key\":\"" + GlobalVars.metasmoke_key + "\"}" })) while True: a = GlobalVars.metasmoke_ws.recv() print(a) try: data = json.loads(a) if "message" in data: message = data['message'] if isinstance(message, Iterable): if "message" in message: GlobalVars.charcoal_hq.send_message( message['message']) elif "blacklist" in message: datahandling.add_blacklisted_user( (message['blacklist']['uid'], message['blacklist']['site']), "metasmoke", message['blacklist']['post']) elif "naa" in message: post_site_id = parsing.fetch_post_id_and_site_from_url( message["naa"]["post_link"]) datahandling.add_ignored_post( post_site_id[0:2]) elif "fp" in message: post_site_id = parsing.fetch_post_id_and_site_from_url( message["fp"]["post_link"]) datahandling.add_false_positive( post_site_id[0:2]) elif "report" in message: post_data = apigetpost.api_get_post( message["report"]["post_link"]) if post_data is None or post_data is False: continue if datahandling.has_already_been_posted( post_data.site, post_data.post_id, post_data.title ) and not datahandling.is_false_positive( (post_data.post_id, post_data.site)): continue user = parsing.get_user_from_url( post_data.owner_url) if user is not None: datahandling.add_blacklisted_user( user, "metasmoke", post_data.post_url) why = u"Post manually reported by user *{}* from metasmoke.\n".format( message["report"]["user"]) spamhandling.handle_spam( title=post_data.title, body=post_data.body, poster=post_data.owner_name, site=post_data.site, post_url=post_data.post_url, poster_url=post_data.owner_url, post_id=post_data.post_id, reasons=[ "Manually reported " + post_data.post_type ], is_answer=post_data.post_type == "answer", why=why, owner_rep=post_data.owner_rep, post_score=post_data.score, up_vote_count=post_data.up_vote_count, down_vote_count=post_data.down_vote_count, question_id=post_data.question_id) elif "commit_status" in message: c = message["commit_status"] sha = c["commit_sha"][:7] if c["commit_sha"] != os.popen( 'git log --pretty=format:"%H" -n 1' ).read(): if c["status"] == "success": if "autopull" in c["commit_message"]: GlobalVars.charcoal_hq.send_message( "[CI]({ci_link}) on {commit_sha} succeeded. Message contains 'autopull', pulling..." .format(ci_link=c["ci_url"], commit_sha=sha)) time.sleep(2) os._exit(3) else: GlobalVars.charcoal_hq.send_message( "[CI]({ci_link}) on {commit_sha} succeeded." .format(ci_link=c["ci_url"], commit_sha=sha)) elif c["status"] == "failure": GlobalVars.charcoal_hq.send_message( "[CI]({ci_link}) on {commit_sha} failed." .format(ci_link=c["ci_url"], commit_sha=sha)) except Exception, e: GlobalVars.metasmoke_ws = websocket.create_connection( GlobalVars.metasmoke_ws_host, origin=GlobalVars.metasmoke_host) GlobalVars.metasmoke_ws.send( json.dumps({ "command": "subscribe", "identifier": "{\"channel\":\"SmokeDetectorChannel\"}" })) print e try: exc_info = sys.exc_info() traceback.print_exception(*exc_info) except: print "meh" except: print "Couldn't bind to MS websocket"
def make_api_call_for_site(self, site): if site not in self.queue: return self.queue_modify_lock.acquire() new_posts = self.queue.pop(site) store_bodyfetcher_queue() self.queue_modify_lock.release() new_post_ids = [int(k) for k in new_posts.keys()] if GlobalVars.flovis is not None: for post_id in new_post_ids: GlobalVars.flovis.stage('bodyfetcher/api_request', site, post_id, {'site': site, 'posts': list(new_posts.keys())}) self.queue_timing_modify_lock.acquire() post_add_times = [v for k, v in new_posts.items()] pop_time = datetime.utcnow() for add_time in post_add_times: try: seconds_in_queue = (pop_time - add_time).total_seconds() if site in self.queue_timings: self.queue_timings[site].append(seconds_in_queue) else: self.queue_timings[site] = [seconds_in_queue] except KeyError: # XXX: Any other possible exception? continue # Skip to next item if we've got invalid data or missing values. store_queue_timings() self.queue_timing_modify_lock.release() self.max_ids_modify_lock.acquire() if site in self.previous_max_ids and max(new_post_ids) > self.previous_max_ids[site]: previous_max_id = self.previous_max_ids[site] intermediate_posts = range(previous_max_id + 1, max(new_post_ids)) # We don't want to go over the 100-post API cutoff, so take the last # (100-len(new_post_ids)) from intermediate_posts intermediate_posts = intermediate_posts[(100 - len(new_post_ids)):] # new_post_ids could contain edited posts, so merge it back in combined = chain(intermediate_posts, new_post_ids) # Could be duplicates, so uniquify posts = list(set(combined)) else: posts = new_post_ids try: if max(new_post_ids) > self.previous_max_ids[site]: self.previous_max_ids[site] = max(new_post_ids) store_bodyfetcher_max_ids() except KeyError: self.previous_max_ids[site] = max(new_post_ids) store_bodyfetcher_max_ids() self.max_ids_modify_lock.release() log('debug', "New IDs / Hybrid Intermediate IDs for {}:".format(site)) if len(new_post_ids) > 30: log('debug', "{} +{} more".format(sorted(new_post_ids)[:30], len(new_post_ids) - 30)) else: log('debug', sorted(new_post_ids)) if len(new_post_ids) == len(posts): log('debug', "[ *Identical* ]") elif len(posts) > 30: log('debug', "{} +{} more".format(sorted(posts)[:30], len(posts) - 30)) else: log('debug', sorted(posts)) question_modifier = "" pagesize_modifier = "" if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. if self.last_activity_date != 0: pagesize = "50" else: pagesize = "25" pagesize_modifier = "&pagesize={pagesize}" \ "&min={time_length}".format(pagesize=pagesize, time_length=str(self.last_activity_date)) else: question_modifier = "/{0}".format(";".join([str(post) for post in posts])) url = "https://api.stackexchange.com/2.2/questions{q_modifier}?site={site}" \ "&filter=!*xq08dCDNr)PlxxXfaN8ntivx(BPlY_8XASyXLX-J7F-)VK*Q3KTJVkvp*&key=IAkbitmze4B8KpacUfLqkw((" \ "{optional_min_query_param}".format(q_modifier=question_modifier, site=site, optional_min_query_param=pagesize_modifier) # wait to make sure API has/updates post data time.sleep(3) GlobalVars.api_request_lock.acquire() # Respect backoff, if we were given one if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) try: time_request_made = datetime.now().strftime('%H:%M:%S') response = requests.get(url, timeout=20).json() except (requests.exceptions.Timeout, requests.ConnectionError, Exception): # Any failure in the request being made (timeout or otherwise) should be added back to # the queue. self.queue_modify_lock.acquire() if site in self.queue: self.queue[site].update(new_posts) else: self.queue[site] = new_posts self.queue_modify_lock.release() GlobalVars.api_request_lock.release() return self.api_data_lock.acquire() add_or_update_api_data(site) self.api_data_lock.release() message_hq = "" if "quota_remaining" in response: if response["quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0: tell_rooms_with("debug", "API quota rolled over with {0} requests remaining. " "Current quota: {1}.".format(GlobalVars.apiquota, response["quota_remaining"])) sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site_name, quota_used in sorted_calls_per_site: sanatized_site_name = site_name.replace('.com', '').replace('.stackexchange', '') api_quota_used_per_site += sanatized_site_name + ": {0}\n".format(str(quota_used)) api_quota_used_per_site = api_quota_used_per_site.strip() tell_rooms_with("debug", api_quota_used_per_site) clear_api_data() if response["quota_remaining"] == 0: tell_rooms_with("debug", "API reports no quota left! May be a glitch.") tell_rooms_with("debug", str(response)) # No code format for now? if GlobalVars.apiquota == -1: tell_rooms_with("debug", "Restart: API quota is {quota}." .format(quota=response["quota_remaining"])) GlobalVars.apiquota = response["quota_remaining"] else: message_hq = "The quota_remaining property was not in the API response." if "error_message" in response: message_hq += " Error: {} at {} UTC.".format(response["error_message"], time_request_made) if "error_id" in response and response["error_id"] == 502: if GlobalVars.api_backoff_time < time.time() + 12: # Add a backoff of 10 + 2 seconds as a default GlobalVars.api_backoff_time = time.time() + 12 message_hq += " Backing off on requests for the next 12 seconds." message_hq += " Previous URL: `{}`".format(url) if "backoff" in response: if GlobalVars.api_backoff_time < time.time() + response["backoff"]: GlobalVars.api_backoff_time = time.time() + response["backoff"] GlobalVars.api_request_lock.release() if len(message_hq) > 0: tell_rooms_with("debug", message_hq.strip()) if "items" not in response: return if site == "stackoverflow.com": items = response["items"] if len(items) > 0 and "last_activity_date" in items[0]: self.last_activity_date = items[0]["last_activity_date"] num_scanned = 0 start_time = time.time() for post in response["items"]: pnb = copy.deepcopy(post) if 'body' in pnb: pnb['body'] = 'Present, but truncated' if 'answers' in pnb: del pnb['answers'] if "title" not in post or "body" not in post: if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage('bodyfetcher/api_response/no_content', site, post['question_id'], pnb) continue post['site'] = site try: post['edited'] = (post['creation_date'] != post['last_edit_date']) except KeyError: post['edited'] = False # last_edit_date not present = not edited try: post_ = Post(api_response=post) except PostParseError as err: log('error', 'Error {0} when parsing post: {1!r}'.format(err, post_)) if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage('bodyfetcher/api_response/error', site, post['question_id'], pnb) continue num_scanned += 1 is_spam, reason, why = check_if_spam(post_) if is_spam: try: if GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage('bodyfetcher/api_response/spam', site, post['question_id'], {'post': pnb, 'check_if_spam': [is_spam, reason, why]}) handle_spam(post=post_, reasons=reason, why=why) except Exception as e: log('error', "Exception in handle_spam:", e) elif GlobalVars.flovis is not None and 'question_id' in post: GlobalVars.flovis.stage('bodyfetcher/api_response/not_spam', site, post['question_id'], {'post': pnb, 'check_if_spam': [is_spam, reason, why]}) try: if "answers" not in post: pass else: for answer in post["answers"]: anb = copy.deepcopy(answer) if 'body' in anb: anb['body'] = 'Present, but truncated' num_scanned += 1 answer["IsAnswer"] = True # Necesssary for Post object answer["title"] = "" # Necessary for proper Post object creation answer["site"] = site # Necessary for proper Post object creation try: answer['edited'] = (answer['creation_date'] != answer['last_edit_date']) except KeyError: answer['edited'] = False # last_edit_date not present = not edited answer_ = Post(api_response=answer, parent=post_) is_spam, reason, why = check_if_spam(answer_) if is_spam: try: if GlobalVars.flovis is not None and 'answer_id' in answer: GlobalVars.flovis.stage('bodyfetcher/api_response/spam', site, answer['answer_id'], {'post': anb, 'check_if_spam': [is_spam, reason, why]}) handle_spam(answer_, reasons=reason, why=why) except Exception as e: log('error', "Exception in handle_spam:", e) elif GlobalVars.flovis is not None and 'answer_id' in answer: GlobalVars.flovis.stage('bodyfetcher/api_response/not_spam', site, answer['answer_id'], {'post': anb, 'check_if_spam': [is_spam, reason, why]}) except Exception as e: log('error', "Exception handling answers:", e) end_time = time.time() GlobalVars.posts_scan_stats_lock.acquire() GlobalVars.num_posts_scanned += num_scanned GlobalVars.post_scan_time += end_time - start_time GlobalVars.posts_scan_stats_lock.release() return
def make_api_call_for_site(self, site): if site not in self.queue: return self.queue_modify_lock.acquire() posts = self.queue.pop(site) store_bodyfetcher_queue() self.queue_modify_lock.release() question_modifier = "" pagesize_modifier = "" if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. if self.last_activity_date != 0: pagesize = "50" else: pagesize = "25" pagesize_modifier = "&pagesize={pagesize}" \ "&min={time_length}".format(pagesize=pagesize, time_length=str(self.last_activity_date)) else: question_modifier = "/{0}".format(";".join(str(post) for post in posts)) url = "https://api.stackexchange.com/2.2/questions{q_modifier}?site={site}" \ "&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((" \ "{optional_min_query_param}".format(q_modifier=question_modifier, site=site, optional_min_query_param=pagesize_modifier) # wait to make sure API has/updates post data time.sleep(3) GlobalVars.api_request_lock.acquire() # Respect backoff, if we were given one if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) try: time_request_made = datetime.now().strftime('%H:%M:%S') response = requests.get(url, timeout=20).json() except (requests.exceptions.Timeout, requests.ConnectionError, Exception): # Any failure in the request being made (timeout or otherwise) should be added back to # the queue. self.queue_modify_lock.acquire() if site in self.queue: self.queue[site].extend(posts) else: self.queue[site] = posts self.queue_modify_lock.release() return self.api_data_lock.acquire() add_or_update_api_data(site) self.api_data_lock.release() message_hq = "" if "quota_remaining" in response: if response["quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0: GlobalVars.charcoal_hq.send_message("API quota rolled over with {0} requests remaining. " "Current quota: {1}.".format(GlobalVars.apiquota, response["quota_remaining"])) sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site_name, quota_used in sorted_calls_per_site: sanatized_site_name = site_name.replace('.com', '').replace('.stackexchange', '') api_quota_used_per_site += sanatized_site_name + ": {0}\n".format(str(quota_used)) api_quota_used_per_site = api_quota_used_per_site.strip() GlobalVars.charcoal_hq.send_message(api_quota_used_per_site, False) clear_api_data() if response["quota_remaining"] == 0: GlobalVars.charcoal_hq.send_message("API reports no quota left! May be a glitch.") GlobalVars.charcoal_hq.send_message(str(response)) # No code format for now? if GlobalVars.apiquota == -1: GlobalVars.charcoal_hq.send_message("Restart: API quota is {quota}." .format(quota=response["quota_remaining"])) GlobalVars.apiquota = response["quota_remaining"] else: message_hq = "The quota_remaining property was not in the API response." if "error_message" in response: message_hq += " Error: {} at {} UTC.".format(response["error_message"], time_request_made) if "error_id" in response and response["error_id"] == 502: if GlobalVars.api_backoff_time < time.time() + 12: # Add a backoff of 10 + 2 seconds as a default GlobalVars.api_backoff_time = time.time() + 12 message_hq += " Backing off on requests for the next 12 seconds." if "backoff" in response: if GlobalVars.api_backoff_time < time.time() + response["backoff"]: GlobalVars.api_backoff_time = time.time() + response["backoff"] GlobalVars.api_request_lock.release() if len(message_hq) > 0: GlobalVars.charcoal_hq.send_message(message_hq.strip()) if "items" not in response: return if site == "stackoverflow.com": items = response["items"] if len(items) > 0 and "last_activity_date" in items[0]: self.last_activity_date = items[0]["last_activity_date"] num_scanned = 0 start_time = time.time() for post in response["items"]: if "title" not in post or "body" not in post: continue num_scanned += 1 title = GlobalVars.parser.unescape(post["title"]) body = GlobalVars.parser.unescape(post["body"]) link = post["link"] post_score = post["score"] up_vote_count = post["up_vote_count"] down_vote_count = post["down_vote_count"] try: owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"]) owner_link = post["owner"]["link"] owner_rep = post["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 q_id = str(post["question_id"]) is_spam, reason, why = check_if_spam(title=title, body=body, user_name=owner_name, user_url=owner_link, post_site=site, post_id=q_id, is_answer=False, body_is_summary=False, owner_rep=owner_rep, post_score=post_score) if is_spam: try: handle_spam(title=title, body=body, poster=owner_name, site=site, post_url=link, poster_url=owner_link, post_id=q_id, reasons=reason, is_answer=False, why=why, owner_rep=owner_rep, post_score=post_score, up_vote_count=up_vote_count, down_vote_count=down_vote_count, question_id=None) except: print "NOP" try: for answer in post["answers"]: num_scanned += 1 answer_title = "" body = answer["body"] print "got answer from owner with name " + owner_name link = answer["link"] a_id = str(answer["answer_id"]) post_score = answer["score"] up_vote_count = answer["up_vote_count"] down_vote_count = answer["down_vote_count"] try: owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"]) owner_link = answer["owner"]["link"] owner_rep = answer["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 is_spam, reason, why = check_if_spam(title=answer_title, body=body, user_name=owner_name, user_url=owner_link, post_site=site, post_id=a_id, is_answer=True, body_is_summary=False, owner_rep=owner_rep, post_score=post_score) if is_spam: try: handle_spam(title=title, body=body, poster=owner_name, site=site, post_url=link, poster_url=owner_link, post_id=a_id, reasons=reason, is_answer=True, why=why, owner_rep=owner_rep, post_score=post_score, up_vote_count=up_vote_count, down_vote_count=down_vote_count, question_id=q_id) except: print "NOP" except: print "no answers" end_time = time.time() GlobalVars.posts_scan_stats_lock.acquire() GlobalVars.num_posts_scanned += num_scanned GlobalVars.post_scan_time += end_time - start_time GlobalVars.posts_scan_stats_lock.release() return
def make_api_call_for_site(self, site): posts = self.queue.pop(site) self.queue_store_lock.acquire() store_bodyfetcher_queue() self.queue_store_lock.release() if site == "stackoverflow.com": # Not all SO questions are shown in the realtime feed. We now # fetch all recently modified SO questions to work around that. min_query = "" if self.last_activity_date != 0: min_query = "&min=" + str(self.last_activity_date) pagesize = "50" else: pagesize = "25" url = "http://api.stackexchange.com/2.2/questions?site=stackoverflow&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((&pagesize=" + pagesize + min_query else: url = "http://api.stackexchange.com/2.2/questions/" + ";".join(str(x) for x in posts) + "?site=" + site + "&filter=!)E0g*ODaEZ(SgULQhYvCYbu09*ss(bKFdnTrGmGUxnqPptuHP&key=IAkbitmze4B8KpacUfLqkw((" # wait to make sure API has/updates post data time.sleep(3) # Respect backoff, if we were given one if GlobalVars.api_backoff_time > time.time(): time.sleep(GlobalVars.api_backoff_time - time.time() + 2) try: response = requests.get(url, timeout=20).json() except requests.exceptions.Timeout: return # could add some retrying logic here, but eh. self.api_data_lock.acquire() add_or_update_api_data(site) self.api_data_lock.release() message_hq = "" if "quota_remaining" in response: if response["quota_remaining"] - GlobalVars.apiquota >= 1000 and GlobalVars.apiquota >= 0: GlobalVars.charcoal_hq.send_message("API quota rolled over with {} requests remaining.".format(GlobalVars.apiquota)) sorted_calls_per_site = sorted(GlobalVars.api_calls_per_site.items(), key=itemgetter(1), reverse=True) api_quota_used_per_site = "" for site, quota_used in sorted_calls_per_site: api_quota_used_per_site = api_quota_used_per_site + site.replace('.com', '').replace('.stackexchange', '') + ": " + str(quota_used) + "\n" api_quota_used_per_site = api_quota_used_per_site.strip() GlobalVars.charcoal_hq.send_message(api_quota_used_per_site, False) clear_api_data() if response["quota_remaining"] == 0: GlobalVars.charcoal_hq.send_message("API reports no quota left! May be a glitch.") GlobalVars.charcoal_hq.send_message(str(response)) # No code format for now? if GlobalVars.apiquota == -1: GlobalVars.charcoal_hq.send_message("Restart: API quota is {}.".format(response["quota_remaining"])) GlobalVars.apiquota = response["quota_remaining"] else: message_hq = "The quota_remaining property was not in the API response." if "error_message" in response: message_hq = message_hq + " Error: {}.".format(response["error_message"]) if "backoff" in response: if GlobalVars.api_backoff_time < time.time() + response["backoff"]: GlobalVars.api_backoff_time = time.time() + response["backoff"] message_hq = message_hq + "\n" + "Backoff received of " + str(response["backoff"]) + " seconds." if len(message_hq) > 0: GlobalVars.charcoal_hq.send_message(message_hq.strip()) if "items" not in response: return if site == "stackoverflow.com": if len(response["items"]) > 0 and "last_activity_date" in response["items"][0]: self.last_activity_date = response["items"][0]["last_activity_date"] for post in response["items"]: if "title" not in post or "body" not in post: continue title = GlobalVars.parser.unescape(post["title"]) body = GlobalVars.parser.unescape(post["body"]) link = post["link"] post_score = post["score"] up_vote_count = post["up_vote_count"] down_vote_count = post["down_vote_count"] try: owner_name = GlobalVars.parser.unescape(post["owner"]["display_name"]) owner_link = post["owner"]["link"] owner_rep = post["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 q_id = str(post["question_id"]) is_spam, reason, why = check_if_spam(title, body, owner_name, owner_link, site, q_id, False, False, owner_rep, post_score) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, q_id, reason, False, why, owner_rep, post_score, up_vote_count, down_vote_count) except: print "NOP" try: for answer in post["answers"]: answer_title = "" body = answer["body"] print "got answer from owner with name " + owner_name link = answer["link"] a_id = str(answer["answer_id"]) post_score = answer["score"] up_vote_count = answer["up_vote_count"] down_vote_count = answer["down_vote_count"] try: owner_name = GlobalVars.parser.unescape(answer["owner"]["display_name"]) owner_link = answer["owner"]["link"] owner_rep = answer["owner"]["reputation"] except: owner_name = "" owner_link = "" owner_rep = 0 is_spam, reason, why = check_if_spam(answer_title, body, owner_name, owner_link, site, a_id, True, False, owner_rep, post_score) if is_spam: try: handle_spam(title, body, owner_name, site, link, owner_link, a_id, reason, True, why, owner_rep, post_score, up_vote_count, down_vote_count) except: print "NOP" except: print "no answers" return
def handle_websocket_data(data): if "message" not in data: return message = data["message"] if isinstance(message, Iterable): if "message" in message: GlobalVars.charcoal_hq.send_message(message["message"]) elif "blacklist" in message: datahandling.add_blacklisted_user( (message["blacklist"]["uid"], message["blacklist"]["site"]), "metasmoke", message["blacklist"]["post"], ) elif "naa" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["naa"]["post_link"]) datahandling.add_ignored_post(post_site_id[0:2]) elif "fp" in message: post_site_id = parsing.fetch_post_id_and_site_from_url(message["fp"]["post_link"]) datahandling.add_false_positive(post_site_id[0:2]) elif "report" in message: post_data = apigetpost.api_get_post(message["report"]["post_link"]) if post_data is None or post_data is False: return if datahandling.has_already_been_posted( post_data.site, post_data.post_id, post_data.title ) and not datahandling.is_false_positive((post_data.post_id, post_data.site)): return user = parsing.get_user_from_url(post_data.owner_url) if user is not None: datahandling.add_blacklisted_user(user, "metasmoke", post_data.post_url) why = u"Post manually reported by user *{}* from metasmoke.\n".format(message["report"]["user"]) spamhandling.handle_spam( title=post_data.title, body=post_data.body, poster=post_data.owner_name, site=post_data.site, post_url=post_data.post_url, poster_url=post_data.owner_url, post_id=post_data.post_id, reasons=["Manually reported " + post_data.post_type], is_answer=post_data.post_type == "answer", why=why, owner_rep=post_data.owner_rep, post_score=post_data.score, up_vote_count=post_data.up_vote_count, down_vote_count=post_data.down_vote_count, question_id=post_data.question_id, ) elif "commit_status" in message: c = message["commit_status"] sha = c["commit_sha"][:7] if c["commit_sha"] != os.popen('git log --pretty=format:"%H" -n 1').read(): if c["status"] == "success": if "autopull" in c["commit_message"]: s = "[CI]({ci_link}) on {commit_sha} succeeded. Message contains 'autopull', pulling...".format( ci_link=c["ci_url"], commit_sha=sha ) GlobalVars.charcoal_hq.send_message(s) time.sleep(2) os._exit(3) else: s = "[CI]({ci_link}) on {commit_sha} succeeded.".format(ci_link=c["ci_url"], commit_sha=sha) elif c["status"] == "failure": s = "[CI]({ci_link}) on {commit_sha} failed.".format(ci_link=c["ci_url"], commit_sha=sha) # noinspection PyUnboundLocalVariable GlobalVars.charcoal_hq.send_message(s)