def __init__(self): """Initializes CrossmodMonitor members""" # Crossmod database interface self.db = CrossmodDB() # PRAW interface to monitor subreddits self.reddit = praw.Reddit( user_agent=CrossmodConsts.REDDIT_USER_AGENT, client_id=CrossmodConsts.REDDIT_CLIENT_ID, client_secret=CrossmodConsts.REDDIT_CLIENT_SECRET, username=CrossmodConsts.REDDIT_USERNAME, password=CrossmodConsts.REDDIT_PASSWORD) # Query database to find which subreddits to listen to and whether to # only simulate moderation actions for each subreddit self.perform_action_in_subreddit = { row.subreddit: row.perform_action for row in self.db.database_session.query( ActiveSubredditsTable).all() } # PRAW interface used to stream comments from subreddits self.subreddits_listener = self.reddit.subreddit("+".join([ row.subreddit for row in self.db.database_session.query( ActiveSubredditsTable.subreddit).all() ])) self.me = self.reddit.user.me()
def __init__(self): self.REMOVED = "[removed]" self.DELETED = "[deleted]" self.db = CrossmodDB() self.session = self.db.database_session #setup the Reddit bot self.reddit = praw.Reddit( user_agent=CrossmodConsts.REDDIT_USER_AGENT, client_id=CrossmodConsts.UPDATER_REDDIT_CLIENT_ID, client_secret=CrossmodConsts.UPDATER_REDDIT_CLIENT_SECRET, username=CrossmodConsts.REDDIT_USERNAME, password=CrossmodConsts.REDDIT_PASSWORD)
def current_overall_stats( crossmod_agreement_score=CrossmodConsts.AGREEMENT_SCORE_THRESHOLD): db = CrossmodDB() crossmod_state = {} crossmod_state['total_comments'] = db.database_session.query( func.count(DataTable.id)).scalar() crossmod_state['automoderator'] = db.database_session.query( func.count(DataTable.banned_by == 'AutoModerator')).scalar() crossmod_state['moderators'] = db.database_session.query(DataTable).filter( DataTable.banned_by != 'AutoModerator', DataTable.banned_by != None).count() crossmod_state['crossmod_agreement_score'] = db.database_session.query( DataTable).filter( DataTable.agreement_score >= crossmod_agreement_score).count() crossmod_state['automoderator_and_crossmod'] = db.database_session.query( DataTable).filter(DataTable.agreement_score > crossmod_agreement_score, DataTable.banned_by == 'AutoModerator').count() crossmod_state['moderators_and_crossmod'] = db.database_session.query( DataTable).filter(DataTable.agreement_score > crossmod_agreement_score, DataTable.banned_by != 'AutoModerator', DataTable.banned_by != None).count() crossmod_state['only_crossmod'] = db.database_session.query( DataTable).filter(DataTable.agreement_score > crossmod_agreement_score, DataTable.banned_by == None).count() return crossmod_state
def main(): db = CrossmodDB() subreddits = [("modbot_staging", True), ("Futurology", False), ("Coronavirus", False), ("China_Flu", False)] for subreddit in subreddits: add_subreddit_to_monitor(db, subreddit[0], subreddit[1])
def __init__(self): """Initializes db object and PRAW object and other vairables to track which subreddits to monitor""" # Crossmod database interface self.db = CrossmodDB() # PRAW interface to monitor subreddits self.reddit = praw.Reddit( user_agent=CrossmodConsts.REDDIT_USER_AGENT, client_id=CrossmodConsts.MONITOR_REDDIT_CLIENT_ID, client_secret=CrossmodConsts.MONITOR_REDDIT_CLIENT_SECRET, username=CrossmodConsts.REDDIT_USERNAME, password=CrossmodConsts.REDDIT_PASSWORD) # Who am I? self.me = self.reddit.user.me() # Keeps track of how many subreddits are currently being monitored # (If this changes during monitor(), monitor() will be called again to # refresh the subreddit list from the db self.current_subreddits_count = self.number_of_subreddits()
def main(): db = CrossmodDB() add_subreddit_to_monitor(db, "modbot_staging", ["thebiglebowskiii"], True) add_subreddit_to_monitor(db, "Futurology", ["thebiglebowskiii", "AutoModerator", "TransPlanetInjection", "Xenophon1", "ion-tom", "mind_bomber", "Gobi_The_Mansoe", "multi-mod", "Buck-Nasty", "Yosarian2", "ImLivingAmongYou", "lughnasadh"])
class CrossmodSubredditMonitor(): """Provides an interface to monitor multiple subreddits by querying Crossmod's API""" def __init__(self): """Initializes db object and PRAW object and other vairables to track which subreddits to monitor""" # Crossmod database interface self.db = CrossmodDB() # PRAW interface to monitor subreddits self.reddit = praw.Reddit( user_agent=CrossmodConsts.REDDIT_USER_AGENT, client_id=CrossmodConsts.MONITOR_REDDIT_CLIENT_ID, client_secret=CrossmodConsts.MONITOR_REDDIT_CLIENT_SECRET, username=CrossmodConsts.REDDIT_USERNAME, password=CrossmodConsts.REDDIT_PASSWORD) # Who am I? self.me = self.reddit.user.me() # Keeps track of how many subreddits are currently being monitored # (If this changes during monitor(), monitor() will be called again to # refresh the subreddit list from the db self.current_subreddits_count = self.number_of_subreddits() def number_of_subreddits(self): return self.db.database_session.query( func.count(SubredditSettingsTable.subreddit)).scalar() def should_perform_action(self, subreddit): """Queries database to check whether active moderation is required.""" row = self.db.database_session.query( SubredditSettingsTable.perform_action).filter( SubredditSettingsTable.subreddit == subreddit).one() return row.perform_action def find_removal_consensus(self, comment, subreddit_name): """Finds removal consensus querying Crossmod's API.""" subreddit_settings = self.db.database_session \ .query(SubredditSettingsTable) \ .filter(SubredditSettingsTable.subreddit == subreddit_name) \ .one() data = { "comments": [comment], "subreddit_list": subreddit_settings.subreddit_classifiers.split(','), "macro_norm_list": subreddit_settings.norm_classifiers.split(','), "key": CrossmodConsts.CLIENT_API_SUPER_KEY } result = requests.post(url=CrossmodConsts.CLIENT_API_ENDPOINT, json=data).json() if type(result) is not list or len(result) != 1: raise ValueError( f"Expected API response to be a list with a single comment, but got: {result}" ) return result[0] def is_whitelisted(self, author, subreddit): """Checks whether the author provided is a moderator of the subreddit in which the comment was posted.""" moderator_list = [ moderator.name for moderator in self.reddit.subreddit(subreddit).moderator() ] moderator_list.append(self.me) return author in moderator_list def perform_action(self, comment, action, agreement_score, norm_violation_score): if action == "EMPTY": return elif action == "remove": print("Removing comment, and alerting moderator by modmail at:", time.time()) self.reddit.subreddit(comment.subreddit.name) \ .modmail.create("[Comment removal by Crossmod] Crossmod performed a comment removal!", f"Crossmod removed a comment with permalink [{comment.permalink}]", self.me) comment.mod.remove() message = f"[Comment removal by Crossmod] Comment removal consensus:\nAgreement Score {agreement_score}, Norm Violation Score {norm_violation_score}" comment.mod.send_removal_message(message, title='ignored', type='public') elif action == "report": print("Reporting a comment and sending it to report queue at:", time.time()) comment.report( f"Agreement Score: {agreement_score}/1.0, Norm Violation Score: {norm_violation_score}/1.0" ) elif action == "modmail": print("Sending a modmail at:", time.time()) self.reddit.subreddit(comment.subreddit.name) \ .modmail.create("[Alert by Crossmod] Comment exceeds removal consensus threshold!", f"A comment with permalink [{comment.permalink}] exceeded Crossmod's removal consensus threshold.", self.me) def check_restart_conditions(self): number_of_subreddits_now = self.number_of_subreddits() if self.current_subreddits_count != number_of_subreddits_now: print("\nSubreddit(s) added! Restarting subreddit monitor..\n") self.current_subreddits_count = number_of_subreddits_now self.monitor() @retry(wait=wait_exponential(multiplier=1, min=4, max=10)) def monitor(self): # Wait for subreddits to be added if there are none in the table while self.number_of_subreddits() == 0: time.sleep(1.0) # PRAW interface used to stream comments from subreddits self.current_subreddits_count = self.number_of_subreddits() subreddits_listener = self.reddit.subreddit("+".join([ row.subreddit for row in self.db.database_session.query( SubredditSettingsTable.subreddit).all() ])) print("Crossmod started monitoring at:", (datetime.datetime.now( pytz.timezone('EST'))).strftime('%Y-%m-%d %H:%M:%S'), "EST") print( "Currently moderating in :", ", ".join([ f"r/{row.subreddit}" for row in self.db.database_session.query( SubredditSettingsTable.subreddit).filter( SubredditSettingsTable.perform_action == True).all() ])) print( "Currently listening to:", ", ".join([ f"r/{row.subreddit}" for row in self.db.database_session.query( SubredditSettingsTable.subreddit).filter( SubredditSettingsTable.perform_action == False).all() ])) print() for comment in subreddits_listener.stream.comments(skip_existing=True): print("______________________________________________\n") start = time.time() if comment == None or comment.body == '[removed]': continue subreddit_name = comment.subreddit.display_name print(f"Posted in r/{subreddit_name}:") print("Comment ID:", comment.id, "\nComment Body:", comment.body.replace('\n', ' ')) if self.is_whitelisted( comment.author, subreddit_name) or CrossmodFilters.apply_filters( comment.body): print("Filtering comment:", comment.id, comment.body) self.db.write(DataTable, created_utc=datetime.datetime.fromtimestamp( comment.created_utc), ingested_utc=datetime.datetime.now(), id=comment.id, body=comment.body, crossmod_action="filtered", author=comment.author.name, subreddit=comment.subreddit.display_name, banned_by=None, banned_at_utc=None, agreement_score=-1.0, norm_violation_score=-1.0) print("______________________________________________\n") self.check_restart_conditions() continue removal_consensus = self.find_removal_consensus( comment.body, subreddit_name) agreement_score = removal_consensus['agreement_score'] norm_violation_score = removal_consensus['norm_violation_score'] print("Agreement score from Crossmod API:", agreement_score) print("Norm violation score from Crossmod API:", norm_violation_score) action = check_config(removal_consensus) ### Write to CrossmodDB self.db.write(DataTable, created_utc=datetime.datetime.fromtimestamp( comment.created_utc), ingested_utc=datetime.datetime.now(), id=comment.id, body=comment.body, crossmod_action=action, author=comment.author.name, subreddit=comment.subreddit.display_name, banned_by=None, banned_at_utc=None, agreement_score=agreement_score, norm_violation_score=norm_violation_score) end = time.time() if self.should_perform_action(subreddit_name): self.perform_action(comment, action, agreement_score, norm_violation_score) print("Processing time for comment:", end - start, "seconds") print("______________________________________________\n") self.check_restart_conditions()
#app.config['CELERY_BROKER_URL'] = 'redis://localhost:6379/0' #app.config['CELERY_RESULT_BACKEND'] = 'redis://localhost:6379/0' # #celery = Celery(app.name, broker = app.config['CELERY_BROKER_URL']) #celery.conf.timezone = 'EST' #import crossmod.tasks # Tell our app about views and model. This is dangerously close to a # circular import, which is naughty, but Flask was designed that way. # (Reference http://flask.pocoo.org/docs/patterns/packages/) We're # going to tell pylint and pycodestyle to ignore this coding style violation. import crossmod.environments # noqa: E402 pylint: disable=wrong-import-position import crossmod.views # noqa: E402 pylint: disable=wrong-import-position import crossmod.ml # noqa: E402 pylint: disable=wrong-import-position import crossmod.helpers # noqa: E402 pylint: disable=wrong-import-position import crossmod.api # noqa: E402 pylint: disable=wrong-import-position clf_ensemble = None from crossmod.db.interface import CrossmodDB db_interface = CrossmodDB() sys.stdout.flush() sys.stderr.flush() @crossmod.app.teardown_appcontext def cleanup(resp_or_exc): crossmod.db_interface.database_session.remove()
def main(): print(CrossmodConsts.DB_PATH) db = CrossmodDB()
from crossmod.ml.classifiers import CrossmodClassifiers from crossmod.db.interface import CrossmodDB from crossmod.db.tables import DataTable from crossmod.helpers.consts import CrossmodConsts from datetime import datetime, timedelta db = CrossmodDB() #classifiers = CrossmodClassifiers() last_week = datetime.now() - timedelta(weeks=1) count = 0 for row in db.database_session.query(DataTable).filter( DataTable.ingested_utc >= last_week).all(): if row.crossmod_action == "filtered": print("Skipping") continue print( f"Modifying row: {count}, comment id: {row.id}, ingested_utc: {row.ingested_utc}" ) #result = classifiers.get_result(row.body) #row.agreement_score = result['agreement_score'] #row.norm_violation_score = result['norm_violation_score'] if row.agreement_score >= 0.85: row.crossmod_action = "report" count += 1 db.database_session.commit() db.database_session.exit()
class CrossmodSubredditMonitor(): """Provides an interface to monitor multiple subreddits by querying Crossmod's API""" def __init__(self): """Initializes CrossmodMonitor members""" # Crossmod database interface self.db = CrossmodDB() # PRAW interface to monitor subreddits self.reddit = praw.Reddit( user_agent=CrossmodConsts.REDDIT_USER_AGENT, client_id=CrossmodConsts.REDDIT_CLIENT_ID, client_secret=CrossmodConsts.REDDIT_CLIENT_SECRET, username=CrossmodConsts.REDDIT_USERNAME, password=CrossmodConsts.REDDIT_PASSWORD) # Query database to find which subreddits to listen to and whether to # only simulate moderation actions for each subreddit self.perform_action_in_subreddit = { row.subreddit: row.perform_action for row in self.db.database_session.query( ActiveSubredditsTable).all() } # PRAW interface used to stream comments from subreddits self.subreddits_listener = self.reddit.subreddit("+".join([ row.subreddit for row in self.db.database_session.query( ActiveSubredditsTable.subreddit).all() ])) self.me = self.reddit.user.me() def find_removal_consensus(self, comment, subreddit_name): """Finds removal consensus querying Crossmod's API""" subreddit_settings = self.db.database_session \ .query(SubredditSettingsTable) \ .filter(SubredditSettingsTable.subreddit == subreddit_name) \ .one() data = { "comments": [comment], "subreddit_list": subreddit_settings.subreddit_classifiers.split(','), "macro_norm_list": subreddit_settings.norm_classifiers.split(','), "key": CrossmodConsts.CLIENT_API_SUPER_KEY } result = requests.post(url=CrossmodConsts.CLIENT_API_ENDPOINT, json=data).json() if type(result) is not list or len(result) != 1: raise ValueError( f"Expected API response to be a list with a single comment, but got: {result}" ) return result[0] def is_whitelisted(self, author, subreddit): moderator_list = self.db.database_session.query( SubredditSettingsTable.moderator_list).filter( SubredditSettingsTable.subreddit == subreddit).one().moderator_list.split(",") moderator_list.append(self.me) return author in moderator_list def perform_action(self, comment, action, agreement_score, norm_violation_score): if action == "EMPTY": return elif action == "remove": print("Removing comment, and alerting moderator by modmail at:", time.time()) subreddit.modmail.create( "[Comment removal by Crossmod] Crossmod performed a comment removal!", f"Crossmod removed a comment with permalink [{comment.permalink}]", self.me) comment.mod.remove() message = f"[Comment removal by Crossmod] Comment removal consensus:\nAgreement Score {agreement_score}, Norm Violation Score {norm_violation_score}" comment.mod.send_removal_message(message, title='ignored', type='public') elif action == "report": print("Reporting a comment and sending it to report queue at:", time.time()) comment.report( f"Agreement Score: {agreement_score}/1.0, Norm Violation Score: {norm_violation_score}/1.0" ) elif action == "modmail": print("Sending a modmail at:", time.time()) subreddit.modmail.create( "[Alert by Crossmod] Comment exceeds removal consensus threshold!", f"A comment with permalink [{comment.permalink}] exceeded Crossmod's removal consensus threshold.", self.me) def monitor(self): print("Crossmod started monitoring at:", (datetime.datetime.now( pytz.timezone('EST'))).strftime('%Y-%m-%d %H:%M:%S'), "EST") print( "Currently monitoring:", ", ".join([ subreddit for subreddit in self.perform_action_in_subreddit.keys() ])) print() for comment in self.subreddits_listener.stream.comments( skip_existing=True): print("______________________________________________\n") start = time.time() if comment == None or comment.body == '[removed]': continue subreddit_name = comment.subreddit.display_name print(f"Posted in r/{subreddit_name}:") print("Comment ID:", comment.id, "\nComment Body:", comment.body.replace('\n', ' ')) if self.is_whitelisted( comment.author, subreddit_name) or CrossmodFilters.apply_filters( comment.body): print("Filtering comment:", comment.id, comment.body) self.db.write(DataTable, created_utc=datetime.datetime.fromtimestamp( comment.created_utc), ingested_utc=datetime.datetime.now(), id=comment.id, body=comment.body, crossmod_action="filtered", author=comment.author.name, subreddit=comment.subreddit.display_name, banned_by=None, banned_at_utc=None, agreement_score=-1.0, norm_violation_score=-1.0) continue removal_consensus = self.find_removal_consensus( comment.body, subreddit_name) agreement_score = removal_consensus['agreement_score'] norm_violation_score = removal_consensus['norm_violation_score'] print("Agreement score from Crossmod API:", agreement_score) print("Norm violation score from Crossmod API:", norm_violation_score) action = check_config(removal_consensus) ### Write to CrossmodDB self.db.write(DataTable, created_utc=datetime.datetime.fromtimestamp( comment.created_utc), ingested_utc=datetime.datetime.now(), id=comment.id, body=comment.body, crossmod_action=action, author=comment.author.name, subreddit=comment.subreddit.display_name, banned_by=None, banned_at_utc=None, agreement_score=agreement_score, norm_violation_score=norm_violation_score) end = time.time() if self.perform_action_in_subreddit[subreddit_name]: self.perform_action(comment, action, agreement_score, norm_violation_score) print("Processing time for comment:", end - start, "seconds") print("______________________________________________\n")
def __init__(self): self.db = CrossmodDB() self.axises = {'x': 'agreement_score', 'y': 'number_of_comments'} self.agreement_score_vs_numbers = self.read_agreement_score_vs_numbers()
class CrossmodDataTableUpdater: def __init__(self): self.REMOVED = "[removed]" self.DELETED = "[deleted]" self.db = CrossmodDB() self.session = self.db.database_session #setup the Reddit bot self.reddit = praw.Reddit( user_agent=CrossmodConsts.REDDIT_USER_AGENT, client_id=CrossmodConsts.UPDATER_REDDIT_CLIENT_ID, client_secret=CrossmodConsts.UPDATER_REDDIT_CLIENT_SECRET, username=CrossmodConsts.REDDIT_USERNAME, password=CrossmodConsts.REDDIT_PASSWORD) def update_database_values(self): print("Starting data table update!") status_count = self.session.query(UpdateStatusTable).count() if (status_count == 0): rows = self.session.query(DataTable) else: starting_row_id = self.session.query(UpdateStatusTable).order_by( UpdateStatusTable.id.desc()).first().last_row_id starting_row = self.session.query(DataTable).filter( DataTable.id == starting_row_id).first() rows = self.session.query(DataTable).filter( DataTable.ingested_utc > starting_row.ingested_utc, DataTable.ingested_utc <= starting_row.ingested_utc + datetime.timedelta(days=7)) update_start_utc = datetime.datetime.now() rows_updated = 0 total = rows.count() for row in rows: if (rows_updated == total - 1): last_row_id = row.id self.change_moderated_value(row, rows_updated) print("{} rows of {}\r\n".format(rows_updated, total)) rows_updated += 1 update_end_utc = datetime.datetime.now() self.db.database_session.commit() if rows_updated > 0: self.db.write(UpdateStatusTable, id=status_count + 1, update_start_utc=update_start_utc, update_end_utc=update_end_utc, rows_updated=rows_updated, last_row_id=last_row_id) def change_moderated_value(self, row, count): comment = self.reddit.comment(id=row.id) if comment.banned_at_utc != None and comment.banned_by != None: row.banned_by = comment.banned_by row.banned_at_utc = datetime.datetime.fromtimestamp( comment.banned_at_utc) if count % 20 == 0: self.db.database_session.commit()
def main(): ###main() # Usage: python3 crossmod.py modbot_staging 1 1 if len(sys.argv) != 4: print("Usage: python3 crossmod.py <subreddit-name> <perform-action [1, 0]> <use-classifiers [1, 0]>") print("Example:") print(" python3 crossmod.py modbot_staging 1 1") print(" starts Crossmod to run on the subreddit modbot_staging, will actively flag comments and use Crossmod's ML backend") exit(1); else: staging_subreddit = sys.argv[1] perform_action = bool(int(sys.argv[2])) use_classifiers = 1 #int(sys.argv[3]) print("Staging subredddit: ", staging_subreddit) print("Perform action: ", perform_action) print("Use classifiers: ", use_classifiers) #setup the Reddit bot reddit = praw.Reddit(user_agent = CrossmodConsts.REDDIT_USER_AGENT, client_id = CrossmodConsts.REDDIT_CLIENT_ID, client_secret = CrossmodConsts.REDDIT_CLIENT_SECRET, username = CrossmodConsts.REDDIT_USERNAME, password = CrossmodConsts.REDDIT_PASSWORD) print("Staging subreddit") subreddit = reddit.subreddit(staging_subreddit) #Select the subreddit for Crossmod to work on db = CrossmodDB() print(subreddit.title) #Prints title of subreddit print(subreddit.description) #Prints description of subreddit print(reddit.user.me()) #Prints your username ###list of white-listed authors whose content the bot would ignore whitelisted_authors = [] whitelisted_authors.append(reddit.user.me().name) mod_list_string = "" moderators_list = ["thebiglebowskiii", "AutoModerator", "TransPlanetInjection", "Xenophon1", "ion-tom", "mind_bomber", "Gobi_The_Mansoe", "multi-mod", "Buck-Nasty", "Yosarian2", "ImLivingAmongYou", "lughnasadh"] #add mods to list of whitelisted_authors for moderator in moderators_list: mod_list_string = mod_list_string + "/u/" + moderator + " , " ###list of subreddits to use for voting (i.e., aggregating the predictions from back-end ensemble of classifiers) subreddit_list = CrossmodConsts.SUBREDDIT_LIST macro_norm_list = CrossmodConsts.NORM_LIST classifiers = crossmod.clf_ensemble whitelisted_authors += moderators_list process_comments(subreddit, classifiers, db, whitelisted_authors, subreddit_list, macro_norm_list, use_classifiers, perform_action) db.database_session.exit()