def process_claim(post, config): """ Handles comment replies containing the word 'claim' and routes based on a basic decision tree. :param post: The Comment object containing the claim. :param config: the global config dict. :return: None. """ top_parent = get_parent_post_id(post, config.r) # WAIT! Do we actually own this post? if top_parent.author.name != 'transcribersofreddit': logging.debug('Received `claim` on post we do not own. Ignoring.') return try: if not coc_accepted(post, config): # do not cache this page. We want to get it every time. post.reply( _( please_accept_coc.format( get_wiki_page('codeofconduct', config)))) return # this can be either '' or None depending on how the API is feeling # today if top_parent.link_flair_text in ['', None]: # There exists the very small possibility that the post was # malformed and doesn't actually have flair on it. In that case, # let's set something so the next part doesn't crash. flair_post(top_parent, flair.unclaimed) if flair.unclaimed in top_parent.link_flair_text: # need to get that "Summoned - Unclaimed" in there too post.reply(_(claim_success)) flair_post(top_parent, flair.in_progress) logging.info( f'Claim on ID {top_parent.fullname} by {post.author} successful' ) # can't claim something that's already claimed elif top_parent.link_flair_text == flair.in_progress: post.reply(_(already_claimed)) elif top_parent.link_flair_text == flair.completed: post.reply(_(claim_already_complete)) except praw.exceptions.APIException as e: if e.error_type == 'DELETED_COMMENT': logging.info( f'Comment attempting to claim ID {top_parent.fullname} has ' f'been deleted. Back up for grabs! ') return raise # Re-raise exception if not
def run(config): time.sleep(config.ocr_delay) new_post = config.redis.lpop('ocr_ids') if new_post is None: logging.debug('No post found. Sleeping.') # nothing new in the queue. Wait and try again. # Yes, I know this is outside a loop. It will be run inside a loop # by tor_core. return # We got something! new_post = new_post.decode('utf-8') logging.info(f'Found a new post, ID {new_post}') url = config.r.submission(id=clean_id(new_post)).url try: result = process_image(url) except OCRError as e: logging.warning('There was an OCR Error: ' + str(e)) return logging.debug(f'result: {result}') if not result: logging.info('Result was none! Skipping!') # we don't want orphan entries config.redis.delete(new_post) return tor_post_id = config.redis.get(new_post).decode('utf-8') logging.info( f'posting transcription attempt for {new_post} on {tor_post_id}') tor_post = config.r.submission(id=clean_id(tor_post_id)) thing_to_reply_to = tor_post.reply( _(base_comment.format(result['process_time_in_ms'] / 1000))) for chunk in chunks(result['text'], 9000): # end goal: if something is over 9000 characters long, we # should post a top level comment, then keep replying to # the comments we make until we run out of chunks. chunk = chunk.replace('\r\n', '\n\n').replace('/u/', '\\/u/').replace( '/r/', '\\/r/').replace(' u/', ' \\/u/').replace(' r/', ' \\/r/').replace('>>', '\>\>') thing_to_reply_to = thing_to_reply_to.reply(_(chunk)) config.redis.delete(new_post)
def process_override(reply, config): """ This process is for moderators of ToR to force u/transcribersofreddit to mark a post as complete and award flair when the bot refutes a `done` claim. The comment containing "!override" must be in response to the bot's comment saying that it cannot find the transcript. :param reply: the comment reply object from the moderator. :param config: the global config object. :return: None. """ # don't remove this check, it's not covered like other admin_commands # because it's used in reply to people, not as a PM if not from_moderator(reply, config): reply.reply(_(random.choice(config.no_gifs))) logging.info(f'{reply.author.name} just tried to override. Lolno.') return # okay, so the parent of the reply should be the bot's comment # saying it can't find it. In that case, we need the parent's # parent. That should be the comment with the `done` call in it. reply_parent = config.r.comment(id=clean_id(reply.parent_id)) parents_parent = config.r.comment(id=clean_id(reply_parent.parent_id)) if 'done' in parents_parent.body.lower(): logging.info( f'Starting validation override for post {parents_parent.fullname}, ' f'approved by {reply.author.name}') process_done(parents_parent, config, override=True)
def update_and_restart(reply, config): if not from_moderator(reply, config): reply.reply(_(random.choice(config.no_gifs))) logging.info('{} just issued update. No.'.format(reply.author.name)) else: pass
def process_override(reply, config): """ This process is for moderators of ToR to force u/transcribersofreddit to mark a post as complete and award flair when the bot refutes a `done` claim. The comment containing "!override" must be in response to the bot's comment saying that it cannot find the transcript. :param reply: the comment reply object from the moderator. :param config: the global config object. :return: None. """ # first we verify that this comment comes from a moderator and that # we can work on it. if not from_moderator(reply, config): reply.reply(_(random.choice(config.no_gifs))) logging.info('{} just tried to override. Lolno.'.format( reply.author.name)) return # okay, so the parent of the reply should be the bot's comment # saying it can't find it. In that case, we need the parent's # parent. That should be the comment with the `done` call in it. reply_parent = config.r.comment(id=clean_id(reply.parent_id)) parents_parent = config.r.comment(id=clean_id(reply_parent.parent_id)) if 'done' in parents_parent.body.lower(): logging.info('Starting validation override for post {}' ', approved by {}'.format(parents_parent.fullname, reply.author.name)) process_done(parents_parent, config, override=True)
def process_thanks(post, config): try: post.reply(_(youre_welcome.format(random.choice(thumbs_up_gifs)))) except praw.exceptions.APIException as e: if e.error_type == 'DELETED_COMMENT': logging.debug('Comment requiring thanks was deleted') return raise
def reload_config(reply, config): if not from_moderator(reply, config): logging.info('{} just issued a reload command. No.'.format( reply.author.name)) reply.reply(_(random.choice(config.no_gifs))) else: logging.info('Reloading configs at the request of {}'.format( reply.author.name)) initialize(config) logging.info('Reload complete.')
def update_and_restart(reply, config): if not from_moderator(reply, config): reply.reply(_(random.choice(config.no_gifs))) logging.info('{} just issued update. No.'.format(reply.author.name)) else: # update from repo sh.git.pull("origin", "master") # restart own process os.execl(sys.executable, sys.executable, *sys.argv)
def process_claim(post, config): """ Handles comment replies containing the word 'claim' and routes based on a basic decision tree. :param post: The Comment object containing the claim. :param config: the global config dict. :return: None. """ top_parent = get_parent_post_id(post, config.r) # WAIT! Do we actually own this post? if top_parent.author.name != 'transcribersofreddit': logging.debug('Received `claim` on post we do not own. Ignoring.') return if not coc_accepted(post, config): # do not cache this page. We want to get it every time. post.reply(_( please_accept_coc.format(get_wiki_page('codeofconduct', config.tor)) )) return if top_parent.link_flair_text is None: # There exists the very small possibility that the post was malformed # and doesn't actually have flair on it. In that case, let's set # something so the next part doesn't crash. flair_post(top_parent, flair.unclaimed) if flair.unclaimed in top_parent.link_flair_text: # need to get that "Summoned - Unclaimed" in there too post.reply(_(claim_success)) flair_post(top_parent, flair.in_progress) logging.info( 'Claim on ID {} by {} successful'.format( top_parent.fullname, post.author ) ) # can't claim something that's already claimed elif top_parent.link_flair_text == flair.in_progress: post.reply(_(already_claimed)) elif top_parent.link_flair_text == flair.completed: post.reply(_(claim_already_complete))
def process_mention(mention): """ Handles username mentions and handles the formatting and posting of those calls as workable jobs to ToR. :param mention: the Comment object containing the username mention. :return: None. """ # message format is subject, then body mention.author.message(pm_subject, _(pm_body)) logging.info(f'Message sent to {mention.author.name}!')
def process_done(post, config, override=False): """ Handles comments where the user says they've completed a post. Also includes a basic decision tree to enable verification of the posts to try and make sure they actually posted a transcription. :param post: the Comment object which contains the string 'done'. :param config: the global config object. :param override: A parameter that can only come from process_override() and skips the validation check. :return: None. """ top_parent = get_parent_post_id(post, config.r) # WAIT! Do we actually own this post? if top_parent.author.name != 'transcribersofreddit': logging.info('Received `done` on post we do not own. Ignoring.') return try: if flair.unclaimed in top_parent.link_flair_text: post.reply(_(done_still_unclaimed)) elif top_parent.link_flair_text == flair.in_progress: if not override and not verified_posted_transcript(post, config): # we need to double-check these things to keep people # from gaming the system logging.info( f'Post {top_parent.fullname} does not appear to have a ' f'post by claimant {post.author}. Hrm... ' ) # noinspection PyUnresolvedReferences try: post.reply(_(done_cannot_find_transcript)) except praw.exceptions.ClientException as e: # We've run into an issue where someone has commented and # then deleted the comment between when the bot pulls mail # and when it processes comments. This should catch that. # Possibly should look into subclassing praw.Comment.reply # to include some basic error handling of this so that # we can fix it throughout the application. logging.warning(e) return # Control flow: # If we have an override, we end up here to complete. # If there is no override, we go into the validation above. # If the validation fails, post the apology and return. # If the validation succeeds, come down here. if override: logging.info('Moderator override starting!') # noinspection PyUnresolvedReferences try: post.reply(_(done_completed_transcript)) update_user_flair(post, config) logging.info( f'Post {top_parent.fullname} completed by {post.author}!' ) except praw.exceptions.ClientException: # If the butt deleted their comment and we're already this # far into validation, just mark it as done. Clearly they # already passed. logging.info( f'Attempted to mark post {top_parent.fullname} ' f'as done... hit ClientException.' ) flair_post(top_parent, flair.completed) config.redis.incr('total_completed', amount=1) except praw.exceptions.APIException as e: if e.error_type == 'DELETED_COMMENT': logging.info( f'Comment attempting to mark ID {top_parent.fullname} ' f'as done has been deleted' ) return raise # Re-raise exception if not
def process_mention(mention, config): """ Handles username mentions and handles the formatting and posting of those calls as workable jobs to ToR. :param mention: the Comment object containing the username mention. :param config: the global config dict :return: None. """ # We have to do this entire parent / parent_permalink thing twice because # the method for calling a permalink changes for each object. Laaaame. if not mention.is_root: # this comment is in reply to something. Let's grab a comment object. parent = config.r.comment(id=clean_id(mention.parent_id)) parent_permalink = parent.permalink() # a comment does not have a title attribute. Let's fake one by giving # it something to work with. parent.title = 'Unknown Content' else: # this is a post. parent = config.r.submission(id=clean_id(mention.link_id)) parent_permalink = parent.permalink # format that sucker so it looks right in the template. parent.title = '"' + parent.title + '"' # Ignore requests made by the OP of content or the OP of the submission if mention.author == parent.author: logging.info('Ignoring mention by OP u/{} on ID {}'.format( mention.author, mention.parent_id)) return logging.info('Posting call for transcription on ID {}'.format( mention.parent_id)) if is_valid(parent.fullname, config): # we're only doing this if we haven't seen this one before. # noinspection PyBroadException try: result = config.tor.submit(title=summoned_submit_title.format( sub=mention.subreddit.display_name, commentorpost=parent.__class__.__name__.lower(), title=parent.title), url=reddit_url.format(parent_permalink)) result.reply( _(rules_comment_unknown_format.format(header=config.header))) result.reply( _( summoned_by_comment.format( reddit_url.format( config.r.comment(clean_id( mention.fullname)).permalink())))) flair_post(result, flair.summoned_unclaimed) logging.debug( 'Posting success message in response to caller, u/{}'.format( mention.author)) mention.reply( _('The transcribers have been summoned! Please be patient ' 'and we\'ll be along as quickly as we can.')) add_complete_post_id(parent.fullname, config) # I need to figure out what errors can happen here except Exception as e: logging.error( '{} - Posting failure message in response to caller, ' 'u/{}'.format(e, mention.author)) mention.reply(_(something_went_wrong))
def process_post(new_post, config): """ After a valid post has been discovered, this handles the formatting and posting of those calls as workable jobs to ToR. :param new_post: Submission object that needs to be posted. :param config: the config object. :return: None. """ if new_post['subreddit'] in config.upvote_filter_subs: # ignore posts if they don't meet the threshold for karma and the sub # is in our list of upvoted filtered ones if new_post['ups'] < config.upvote_filter_subs[new_post['subreddit']]: return if not is_valid(new_post['name'], config): logging.debug(id_already_handled_in_db.format(new_post['name'])) return if new_post['archived']: return if new_post['author'] is None: # we don't want to handle deleted posts, that's just silly return logging.info( f'Posting call for transcription on ID {new_post["name"]} posted by ' f'{new_post["author"]}') if new_post['domain'] in config.image_domains: content_type = 'image' content_format = config.image_formatting elif new_post['domain'] in config.audio_domains: content_type = 'audio' content_format = config.audio_formatting elif new_post['domain'] in config.video_domains: if 'youtu' in new_post['domain']: if not valid_youtube_video(new_post['url']): add_complete_post_id(new_post['name'], config) return if get_yt_transcript(new_post['url']): np = config.r.submission(id=new_post['name']) np.reply(_(yt_already_has_transcripts)) add_complete_post_id(new_post['name'], config) logging.info( f'Found YouTube video, {get_yt_video_id(new_post["url"])},' f' with good transcripts.') return content_type = 'video' content_format = config.video_formatting else: # This means we pulled from a subreddit bypassing the filters. content_type = 'Other' content_format = config.other_formatting # Truncate a post title if it exceeds 250 characters, so the added # formatting still fits in Reddit's 300 char limit for post titles post_title = new_post['title'] max_title_length = 250 if len(post_title) > max_title_length: post_title = post_title[:max_title_length - 3] + '...' # noinspection PyBroadException try: result = config.tor.submit( title=discovered_submit_title.format(sub=new_post['subreddit'], type=content_type.title(), title=post_title), url=reddit_url.format(new_post['permalink'])) result.reply( _( rules_comment.format(post_type=content_type, formatting=content_format, header=config.header))) flair_post(result, flair.unclaimed) add_complete_post_id(new_post['name'], config) config.redis.incr('total_posted', amount=1) if config.OCR and content_type == 'image': # hook for OCR bot; in order to avoid race conditions, we add the # key / value pair that the bot isn't looking for before adding # to the set that it's monitoring. config.redis.set(new_post['name'], result.fullname) config.redis.rpush('ocr_ids', new_post['name']) config.redis.incr('total_new', amount=1) # The only errors that happen here are on Reddit's side -- pretty much # exclusively 503s and 403s that arbitrarily resolve themselves. A missed # post or two is not the end of the world. except Exception as e: logging.error( f'{e} - unable to post content.\nID: {new_post["name"]}\n ' f'Title: {new_post["title"]}\n Subreddit: ' f'{new_post["subreddit"]}')