def test_whitelist(self): """ Check that the offensive classifier doesn't recognize phrases in the whitelist but still recognizes offensive phrases elsewhere in the text """ self.assertFalse(contains_offensive("have you seen kill bill")) self.assertTrue(contains_offensive("f**k have you seen kill bill"))
def should_link(span: str, include_common_phrases: bool) -> bool: """ Determines whether we should attempt to link a span to entities. If include_common_phrases is False, spans consisting entirely of DONT_LINK_WORDS are not linked. If include_common_phrases is True, multi-word (ngrams with n>1) spans consisting entirely of DONT_LINK_WORDS are linked. Spans which are a single DONT_LINK_WORD are still not linked. """ if span in MANUAL_SPAN2ENTINFO: return True elif contains_offensive( span, 'Span "{}" contains offensive phrase "{}" so will not try to entity link' ): return False # If span consists entirely of DONT_LINK_WORDS, only include if it's multi-word and include_common_phrases=True elif all(is_dont_link_word(word) for word in span.split()): if len(span.split()) > 1 and include_common_phrases: return True else: logger.debug( f'Not linking span "{span}" because it consists of DONT_LINK_WORDS' ) return False else: return True
def remove_offensive(self, ranked_results: RankedResults) -> RankedResults: """ Check the top-ranked response/prompt in ranked_results for offensiveness. If it's inoffensive, do nothing. If it's offensive, remove it from ranked_results, and start again by checking the second-ranked response/prompt. Arguments: ranked_results: RankedResults (responses or prompts from RGs). Returns: ranked_results, potentially with some results removed, so that the top result is guaranteed to be inoffensive. """ top_result = ranked_results.top_result top_rg = ranked_results.top_rg logger.info( f'Checking top-priority {type(top_result).__name__} from {top_rg} for offensiveness: "{top_result.text}"' ) if contains_offensive(top_result.text): logger.error( f'{top_rg} gave an offensive result (i.e. the contains_offensive function returned True). ' f'This should be caught inside the RG! Offensive text: "{top_result.text}"' ) ranked_results.remove_result(top_rg) return self.remove_offensive( ranked_results) # start again, checking the new top result else: return ranked_results
def get_convpara_snippet(self, snippet: str, entity: WikiEntity) -> Optional[str]: if entity.name in CONVPARA_BLACKLISTED_ENTITIES: logger.primary_info(f"{entity} blacklisted for convpara") return None paraphrases = ConvPara(self.rg.state_manager).get_paraphrases( background=snippet, entity=entity.name) paraphrases = filter_and_log(lambda p: p.finished, paraphrases, "Paraphrases for snippet", "they were unfinished") paraphrases = filter_and_log( lambda p: not contains_offensive(p.readable_text()), paraphrases, "Paraphrases for snippet", "contained offensive phrase") #paraphrases = filter_and_log(lambda p: not did_you_know.execute(p.readable_text()), paraphrases, "Paraphrases for snippet", "contained did you know question") if not paraphrases: logger.warning( f"No good convparaphrases for snippet: \n {snippet}") return None paraphrases = sorted( paraphrases, key=lambda p: self.ngram_recall([p.readable_text()], snippet, 1), reverse=True) text = paraphrases[0].readable_text() if text[-1] not in ['.', '!', '?']: text += '.' return text
def get_opinions(phrase : str) -> Set[Opinion]: """This method takes in an phrase and return a set of opinions :param phrase: the opinionable phrase that we have identified :type phrase: str :return: a list of opinions that are not offensive :rtype: Set[Opinion] """ results = fetch_sql(f""" select distinct phrase, reason, attitude, sentiment from labeled_opinions where phrase = '{phrase}' and reason_appropriateness = 4""") opinions = set(parse_entry(entry) for entry in results) opinions = set(opinion for opinion in opinions if not contains_offensive(opinion.reason)) return opinions
def select_thread(self, thread_list: List[dict], topic: str) -> Optional[List[dict]]: """Selects a thread with maximum karma without offensive phrases in the title.""" thread_list = list(filter(lambda thread: \ not offensive_classify.contains_offensive(\ thread['_source']['title']), thread_list)) # Filter threads whose title doesn't contain the single-worded topics. if topic and len(topic.split()) == 1: thread_list = list(filter(lambda thread: topic in [word.lower() \ for word in thread['_source']['title'].split()], thread_list)) if not thread_list: return None thread_list = sorted(thread_list, key = lambda thread: \ int(thread['_source']['karma']), reverse = True) return thread_list[0]['_source']
def _neural_response_filtering(responses: Tuple[str]): """ The function where actual filtering logic takes place for basic filtering Args: responses (List[str]): each string is a possible response Returns: List[str]: Each string is a filtered possible response """ # remove duplicates responses = list(set(responses)) # remove offensive responses = filter_and_log(lambda r: not contains_offensive(r), responses, 'neural_responses', 'they contain offensive phrases') # remove advice responses = filter_and_log(lambda r: not contains_advice(r), responses, 'neural_responses', 'they contain advice') return responses
def respond_til(self, state: State, entity: WikiEntity, preferences: Optional[ConvParaPreferences] = None, til_text=False) -> ResponseGeneratorResult: if entity.name in CONVPARA_BLACKLISTED_ENTITIES: raise CantRespondError(f"{entity} blacklisted for convpara") if not til_text: til_response = self.get_til(entity.name, state) if not til_response: raise CantRespondError("Not responding with more TILs") til_text, _, _ = til_response paraphrases = ConvPara(self.rg.state_manager).get_paraphrases( background=til_text, entity=entity.name) paraphrases = filter_and_log(lambda p: p.finished, paraphrases, "Paraphrases for TIL", "they were unfinished") paraphrases = filter_and_log( lambda p: not contains_offensive(p.readable_text()), paraphrases, "Paraphrases for TIL", "contained offensive phrase") if not paraphrases: raise CantRespondError( f"No good conv paraphrases for TIL \n {til_text}") if preferences: if preferences.statement_or_question: if preferences.statement_or_question == 'question': paraphrases = sorted( paraphrases, key=lambda p: did_you_know.execute(p.readable_text()), reverse=True) else: paraphrases = sorted(paraphrases, key=lambda p: not did_you_know. execute(p.readable_text()), reverse=True) if preferences.higher_unigram_recall: generations_for_other_tils = state.entity_state[ entity.name].conv_paraphrases[ til_text] if til_text in state.entity_state[ entity.name].conv_paraphrases else [] paraphrases = sorted( paraphrases, key=lambda p: self.ngram_recall([p.readable_text( )] + generations_for_other_tils, til_text, 1), reverse=True) text = paraphrases[0].readable_text() else: text = random.choice([p.readable_text() for p in paraphrases]) if text[-1] not in ['.', '!', '?']: text += '.' logger.primary_info( f'WIKI is responding with a *paraphrased* TIL to entity {entity.name}' ) logger.primary_info(f"TIL text: {til_text} \n ConvPara output: {text}") conditional_state = ConditionalState( cur_doc_title=entity.name, til_used=til_text, responding_treelet=self.__repr__(), prompt_handler=f"{self.__repr__()}:paraphrase_handler", paraphrase=(til_text, text)) base_response_result = ResponseGeneratorResult( text=text, priority=ResponsePriority.CAN_START, cur_entity=entity, needs_prompt=False, state=state, conditional_state=conditional_state) return base_response_result
def label(): with bz2.open(DUMP_PATH, 'rt') as f: while True: (entity, sentiment), tweets = literal_eval(f.readline()) if ENTITIES_TO_WHITELIST is not None and entity not in ENTITIES_TO_WHITELIST: continue while entity not in set([annotator_opinion.entity for annotator_opinion in annotator_opinions]) and \ entity not in not_appropriate_entities: print( f'>> What is your sentiment on \033[92m{entity}\033[00m? (positive or negative or neutral), type "inapprop" if it is inappropriate, "exit" to exit' ) feedback = input(f'? ') if feedback == 'positive': insert_annotator((entity, True, feedback), host_stream) annotator_opinions.append( AnnotatorOpinions('', ANNOTATOR, entity, True, feedback)) elif feedback == 'negative': insert_annotator((entity, True, feedback), host_stream) annotator_opinions.append( AnnotatorOpinions('', ANNOTATOR, entity, True, feedback)) elif feedback == 'neutral': insert_annotator((entity, True, feedback), host_stream) annotator_opinions.append( AnnotatorOpinions('', ANNOTATOR, entity, True, feedback)) elif feedback == 'inapprop': insert_annotator((entity, False, None), host_stream) not_appropriate_entities.add(entity) elif feedback == 'exit': return if entity in not_appropriate_entities: print( f'>> Skipping \033[91m{entity}\033[00m because it is inappropriate' ) continue if done(entity, sentiment): print( f'>> Skipping \033[91m{entity}, {sentiment}\033[00m because we already have enough' ) continue print( f'>>>>>> Begin new phrase \033[91m{entity}, {sentiment}\033[00m' ) for tweet in tweets: if done(entity, sentiment): break reason, _, attitude, _, tweet_id = tweet if reason in labeled_reasons: continue if len(reason.split(' ')) < 5 or contains_offensive(reason): continue feedback = '' while not ((feedback.isdigit() and int(feedback) <= 4 and int(feedback) >= 1) or feedback == 'exit'): opposite_attitude = 'like' if attitude in [ 'hate', 'dislike', "don't like" ] else "don't like" good_counters = COUNTER[(entity, sentiment, 4)] current_counters = sum(COUNTER[(entity, sentiment, feedback)] for feedback in range(1, 5)) print( f'> i \033[93m{opposite_attitude} \033[92m{entity}\033[00m but (i feel like, because) \033[96m{reason}\033[00m ({good_counters}/{current_counters}/{len(tweets)})' ) category_string = '; '.join( [f'[{key}] {val}' for key, val in CATEGORIES.items()]) print(f'Select from {category_string}, or "exit" to exit') feedback = input(f'? ') if (feedback.isdigit() and int(feedback) <= 4 and int(feedback) >= 1): insert_opinion((entity, reason, attitude, sentiment, feedback, tweet_id), host_stream) COUNTER[(entity, sentiment, int(feedback))] += 1 elif feedback == 'exit': return
def get_response(self, state: State) -> ResponseGeneratorResult: # Try to get name from utterance utterance = self.state_manager.current_state.text user_intent, user_name = self.get_name_from_utterance( utterance) # str or None logger.primary_info(f"Detected UserIntent {user_intent}.") if user_name is not None and contains_offensive( user_name, 'User name "{}" contains offensive phrase "{}", so acting like we didn\'t detect name.' ): user_name = None if user_intent == UserIntent.yes or user_intent == UserIntent.yes_without_name: # If we got the name, save it and say intro phrase if user_name: setattr(self.state_manager.user_attributes, 'name', user_name) return ResponseGeneratorResult( text=random.choice(GREET_WITH_NAME).format(user_name), priority=ResponsePriority.STRONG_CONTINUE, needs_prompt=True, state=state, cur_entity=None, smooth_handoff=SmoothHandoff.LAUNCH_TO_NEURALCHAT, conditional_state=ConditionalState( None, user_intent=user_intent)) # If we didn't get the name and we have not asked before, ask for name elif state.asked_name_counter == 0 or user_intent == UserIntent.yes_without_name: logger.primary_info( 'Was unable to detect name, but have not asked for name, so asking for name again' ) return ResponseGeneratorResult( text=ASK_NAME_FIRST_TIME, priority=ResponsePriority.STRONG_CONTINUE, needs_prompt=False, state=state, cur_entity=None, conditional_state=ConditionalState( HandleNameTreelet.__name__, user_intent=user_intent)) # If we didn't get the name and we've asked once before, ask again elif state.asked_name_counter == 1: logger.primary_info( 'Was unable to detect name, so asking for name again') return ResponseGeneratorResult( text=ASK_NAME_AGAIN, priority=ResponsePriority.STRONG_CONTINUE, needs_prompt=False, state=state, cur_entity=None, conditional_state=ConditionalState( HandleNameTreelet.__name__, user_intent=user_intent)) # If we didn't get the name but we've already asked too many times, greet without name and move on else: return ResponseGeneratorResult( text=random.choice(GREET_WITHOUT_NAME), priority=ResponsePriority.STRONG_CONTINUE, needs_prompt=True, state=state, cur_entity=None, smooth_handoff=SmoothHandoff.LAUNCH_TO_NEURALCHAT, conditional_state=ConditionalState( None, user_intent=user_intent)) elif user_intent == UserIntent.no: return ResponseGeneratorResult( text=MOVING_ON, priority=ResponsePriority.STRONG_CONTINUE, needs_prompt=True, state=state, cur_entity=None, smooth_handoff=SmoothHandoff.LAUNCH_TO_NEURALCHAT, conditional_state=ConditionalState(None, user_intent=user_intent)) elif user_intent == UserIntent.why: return ResponseGeneratorResult( text= "Oh, I just want to get to know you! But if you'd prefer to stay " "anonymous, that's no problem. So, do you mind telling me your " "name?", priority=ResponsePriority.STRONG_CONTINUE, needs_prompt=False, state=state, cur_entity=None, conditional_state=ConditionalState(HandleNameTreelet.__name__, user_intent=user_intent)) elif user_intent == UserIntent.repeat: return ResponseGeneratorResult( text="Ok! What's your name?", priority=ResponsePriority.STRONG_CONTINUE, needs_prompt=False, state=state, cur_entity=None, conditional_state=ConditionalState(HandleNameTreelet.__name__, user_intent=user_intent)) # If we didn't get the name but we've already asked too many times, greet without name with needs_prompt=True, # and smooth handoff to the next part of the launch sequence else: return ResponseGeneratorResult( text=random.choice(GREET_WITHOUT_NAME), priority=ResponsePriority.STRONG_CONTINUE, needs_prompt=True, state=state, cur_entity=None, smooth_handoff=SmoothHandoff.LAUNCH_TO_NEURALCHAT, conditional_state=ConditionalState(None))
def test_removed_phrases(self): """ Check that the offensive classifier doesn't recognize manually removed phrases """ self.assertFalse(contains_offensive("i love ginger cake"))
def test_added_phrases(self): """ Check that the offensive classifier recognizes manually added offensive phrases """ self.assertTrue(contains_offensive("i'm watching pornhub"))
def test_stopwords_inoffensive(self): """ Check that the offensive classifier doesn't classify any stopwords as offensive. This isn't a comprehensive check for false positives, but it checks for the most common inoffensive words. """ self.assertFalse(contains_offensive(' '.join(STOPWORDS)))
def test_offensive(self, user_utterance): """ Check that the offensive classifier recognizes offensive phrases, robust to case, singular/plural, punctuation, position in text, etc. """ self.assertTrue(contains_offensive(user_utterance))