def _setup_editor(self): """ Sets up a `checklist.editor.Editor` object, to be used for adding default tests to the suite. """ if not hasattr(self, "editor"): self.editor = Editor()
def robustness_test(): editor = Editor() #food_ret = editor.template('How often do you get {food}?', food=food, labels=0, save=True) #, nsamples=100) pdata = list(processor.pipe(food_ret.data)) perturbed_punct = Perturb.perturb(pdata, Perturb.punctuation, keep_original=False) perturbed_typo = Perturb.perturb(food_ret.data, Perturb.add_typos, keep_original=False) inv_food_punct = INV(**perturbed_punct, name='Minor Changes: Punctuation', capability='robustness', description='') inv_food_typo = INV(**perturbed_typo, name='Minor Changes: Typos', capability='robustness', description='') pdata = list(processor.pipe(drug_ret.data)) perturbed_punct = Perturb.perturb(pdata, Perturb.punctuation, keep_original=False) perturbed_typo = Perturb.perturb(drug_ret.data, Perturb.add_typos, keep_original=False) inv_drug_punct = INV(**perturbed_punct, name='Minor Changes: Punctuation', capability='robustness', description='') inv_drug_typo = INV(**perturbed_typo, name='Minor Changes: Typos', capability='robustness', description='') #Perturb.contract #Perturb.expand_contractions #Perturb.contractions #Perturb.change_names #Perturb.change_location #Perturb.change_number tests = [inv_food_punct, inv_food_typo, inv_drug_punct, inv_drug_typo] names = [ "inv_food_punct", "inv_food_typo", "inv_drug_punct", "inv_drug_typo" ] for test, name in zip(tests, names): test.to_raw_file('./tests/' + name + '.txt') return tests, names
class TestTestTypes(TestCase): editor = Editor() dummy_test_data = editor.template( templates=["example 1", "example 2"], meta=False, labels=["label1", "label2"]) test_id = "100" def test_mft_w_test_id(self): mft_test = MFT(**self.dummy_test_data, expect=Expect.eq(), name="mft test with test id", test_id=self.test_id) assert mft_test.test_id == self.test_id def test_mft_wo_test_id(self): mft_test = MFT(**self.dummy_test_data, expect=Expect.eq(), name="mft test without test id") assert mft_test.test_id is None def test_dir_w_test_id(self): dir_test = DIR(**self.dummy_test_data, expect=Expect.eq(), name="dir test with test id", test_id=self.test_id) assert dir_test.test_id == self.test_id def test_dir_wo_test_id(self): dir_test = DIR(**self.dummy_test_data, expect=Expect.eq(), name="dir test without test id") assert dir_test.test_id is None def test_inv_w_test_id(self): inv_test = INV(**self.dummy_test_data, expect=Expect.eq(), name="inv test with test id", test_id=self.test_id) assert inv_test.test_id == self.test_id def test_inv_wo_test_id(self): inv_test = INV(**self.dummy_test_data, expect=Expect.eq(), name="inv test without test id") assert inv_test.test_id is None
def get_template(config): editor = Editor() entailment = config["entailment"] overlap = config["overlap"] if entailment and overlap: return [ editor.template( { 'premise': '{first_name} is {adj1} but not {adj2}.', 'hypothesis': '{first_name} is {adj1}.' }, labels=1, adj=ADJ, remove_duplicates=True), editor.template( { 'premise': '{first_name} is {adj1} but not {adj2}.', 'hypothesis': '{first_name} is not {adj2}.' }, labels=1, adj=ADJ, remove_duplicates=True) ] elif overlap: return [ editor.template( { 'premise': '{first_name} is {adj1} but not {adj2}.', 'hypothesis': '{first_name} is {adj2}.' }, labels=0, adj=ADJ, remove_duplicates=True) ] else: return [ editor.template( { 'premise': '{first_name} is {adj1} but not {adj2}.', 'hypothesis': '{first_name} is not {adj1}.' }, labels=0, adj=ADJ, remove_duplicates=True) ]
def generate_sents(template, words): editor = Editor() ret = editor.template(template, word=words) return ret.data
def generate_words(suggest_sentence): editor = Editor() words = editor.suggest(suggest_sentence) return words
def make_suite(): suite = TestSuite() editor = Editor()
#!/usr/bin/env python # coding: utf-8 # # CheckList Playground # # Useful for coming up with a lot of words of some class (eg: transitive verb). # In[1]: import checklist from checklist.editor import Editor # In[2]: editor = Editor() # In[9]: editor.suggest('The {mask} showed him his car.')
class SentimentAnalysisSuite(TaskSuite): """ This suite was built using the checklist process with the self.editor suggestions. Users are encouraged to add/modify as they see fit. Note: `editor.suggest(...)` can be slow as it runs a language model. """ def __init__( self, suite: Optional[TestSuite] = None, positive: Optional[int] = 0, negative: Optional[int] = 1, **kwargs, ): self._positive = positive self._negative = negative super().__init__(suite, **kwargs) @overrides def _prediction_and_confidence_scores(self, predictor): def preds_and_confs_fn(data): labels = [] confs = [] if isinstance(data[0], Instance): predictions = predictor.predict_batch_instance(data) else: data = [{"sentence": sentence} for sentence in data] predictions = predictor.predict_batch_json(data) for pred in predictions: label = pred["probs"].index(max(pred["probs"])) labels.append(label) confs.append([ pred["probs"][self._positive], pred["probs"][self._negative] ]) return np.array(labels), np.array(confs) return preds_and_confs_fn @overrides def _format_failing_examples( self, inputs: Tuple, pred: int, conf: Union[np.array, np.ndarray], label: Optional[int] = None, *args, **kwargs, ): """ Formatting function for printing failed test examples. """ labels = {self._positive: "Positive", self._negative: "Negative"} ret = str(inputs) if label is not None: ret += " (Original: %s)" % labels[label] ret += "\nPrediction: %s (Confidence: %.1f)" % (labels[pred], conf[pred]) return ret @overrides def _default_tests(self, data: Optional[Iterable[str]], num_test_cases=100): super()._default_tests(data, num_test_cases) self._setup_editor() self._default_vocabulary_tests(data, num_test_cases) self._default_ner_tests(data, num_test_cases) self._default_temporal_tests(data, num_test_cases) self._default_fairness_tests(data, num_test_cases) self._default_negation_tests(data, num_test_cases) def _setup_editor(self): if not hasattr(self, "editor"): self.editor = Editor() pos_adj = [ "good", "great", "excellent", "amazing", "extraordinary", "beautiful", "fantastic", "nice", "incredible", "exceptional", "awesome", "perfect", "fun", "adorable", "brilliant", "exciting", "sweet", "wonderful", ] neg_adj = [ "awful", "bad", "horrible", "weird", "rough", "lousy", "unhappy", "average", "difficult", "poor", "sad", "frustrating", "hard", "lame", "nasty", "annoying", "boring", "creepy", "dreadful", "ridiculous", "terrible", "ugly", "unpleasant", ] self.editor.add_lexicon("pos_adj", pos_adj, overwrite=True) self.editor.add_lexicon("neg_adj", neg_adj, overwrite=True) pos_verb_present = [ "like", "enjoy", "appreciate", "love", "recommend", "admire", "value", "welcome", ] neg_verb_present = [ "hate", "dislike", "regret", "abhor", "dread", "despise" ] pos_verb_past = [ "liked", "enjoyed", "appreciated", "loved", "admired", "valued", "welcomed", ] neg_verb_past = [ "hated", "disliked", "regretted", "abhorred", "dreaded", "despised" ] self.editor.add_lexicon("pos_verb_present", pos_verb_present, overwrite=True) self.editor.add_lexicon("neg_verb_present", neg_verb_present, overwrite=True) self.editor.add_lexicon("pos_verb_past", pos_verb_past, overwrite=True) self.editor.add_lexicon("neg_verb_past", neg_verb_past, overwrite=True) self.editor.add_lexicon("pos_verb", pos_verb_present + pos_verb_past, overwrite=True) self.editor.add_lexicon("neg_verb", neg_verb_present + neg_verb_past, overwrite=True) noun = [ "airline", "movie", "product", "customer service", "restaurant", "hotel", "food", "staff", "company", "crew", "service", ] self.editor.add_lexicon("noun", noun, overwrite=True) intens_adj = [ "very", "really", "absolutely", "truly", "extremely", "quite", "incredibly", "amazingly", "especially", "exceptionally", "unbelievably", "utterly", "exceedingly", "rather", "totally", "particularly", ] intens_verb = [ "really", "absolutely", "truly", "extremely", "especially", "utterly", "totally", "particularly", "highly", "definitely", "certainly", "genuinely", "honestly", "strongly", "sure", "sincerely", ] self.editor.add_lexicon("intens_adj", intens_adj, overwrite=True) self.editor.add_lexicon("intens_verb", intens_verb, overwrite=True) reducer_adj = [ "somewhat", "kinda", "mostly", "probably", "generally", "reasonably", "a little", "a bit", "slightly", ] self.editor.add_lexicon("reducer_adj", reducer_adj, overwrite=True) self.monotonic_label = Expect.monotonic(increasing=True, tolerance=0.1) self.monotonic_label_down = Expect.monotonic(increasing=False, tolerance=0.1) def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_cases=100): positive_words = (self.editor.lexicons["pos_adj"] + self.editor.lexicons["pos_verb_present"] + self.editor.lexicons["pos_verb_past"]) test = MFT( positive_words, labels=self._positive, name="Single Positive Words", capability="Vocabulary", description="Correctly recognizes positive words", ) self.add_test(test) negative_words = (self.editor.lexicons["neg_adj"] + self.editor.lexicons["neg_verb_present"] + self.editor.lexicons["neg_verb_past"]) test = MFT( negative_words, labels=self._negative, name="Single Negative Words", capability="Vocabulary", description="Correctly recognizes negative words", ) self.add_test(test) template = self.editor.template( "{it} {noun} {be} {pos_adj}.", it=["The", "This", "That"], be=["is", "was"], labels=self._positive, save=True, ) template += self.editor.template( "{it} {be} {a:pos_adj} {noun}.", it=["It", "This", "That"], be=["is", "was"], labels=self._positive, save=True, ) template += self.editor.template( "{i} {pos_verb} {the} {noun}.", i=["I", "We"], the=["this", "that", "the"], labels=self._positive, save=True, ) template += self.editor.template( "{it} {noun} {be} {neg_adj}.", it=["That", "This", "The"], be=["is", "was"], labels=self._negative, save=True, ) template += self.editor.template( "{it} {be} {a:neg_adj} {noun}.", it=["It", "This", "That"], be=["is", "was"], labels=self._negative, save=True, ) template += self.editor.template( "{i} {neg_verb} {the} {noun}.", i=["I", "We"], the=["this", "that", "the"], labels=self._negative, save=True, ) test = MFT( **template, name="Sentiment-laden words in context", capability="Vocabulary", description="Use positive and negative verbs and adjectives " "with nouns such as product, movie, airline, etc. " 'E.g. "This was a bad movie"', ) self.add_test(test) template = self.editor.template( [ "{it} {be} {a:pos_adj} {noun}.", "{it} {be} {a:intens_adj} {pos_adj} {noun}." ], it=["It", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{i} {pos_verb} {the} {noun}.", "{i} {intens_verb} {pos_verb} {the} {noun}." ], i=["I", "We"], the=["this", "that", "the"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{it} {be} {a:neg_adj} {noun}.", "{it} {be} {a:intens_adj} {neg_adj} {noun}." ], it=["It", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{i} {neg_verb} {the} {noun}.", "{i} {intens_verb} {neg_verb} {the} {noun}." ], i=["I", "We"], the=["this", "that", "the"], nsamples=num_test_cases, save=True, ) test = DIR( template.data, self.monotonic_label, templates=template.templates, name="Intensifiers", capability="Vocabulary", description= "Test is composed of pairs of sentences (x1, x2), where we add an intensifier" "such as 'really',or 'very' to x2 and expect the confidence to NOT go down " "(with tolerance=0.1). e.g.:" "x1 = 'That was a good movie'" "x2 = 'That was a very good movie'", ) self.add_test(test) template = self.editor.template( [ "{it} {noun} {be} {pos_adj}.", "{it} {noun} {be} {reducer_adj} {pos_adj}." ], it=["The", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{it} {noun} {be} {neg_adj}.", "{it} {noun} {be} {reducer_adj} {neg_adj}." ], it=["The", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) test = DIR( template.data, self.monotonic_label_down, templates=template.templates, name="Reducers", capability="Vocabulary", description= "Test is composed of pairs of sentences (x1, x2), where we add a reducer" "such as 'somewhat', or 'kinda' to x2 and expect the confidence to NOT go up " " (with tolerance=0.1). e.g.:" "x1 = 'The staff was good.'" "x2 = 'The staff was somewhat good.'", ) self.add_test(test) if data: positive = self.editor.template("I {pos_verb_present} you.").data positive += self.editor.template("You are {pos_adj}.").data negative = self.editor.template("I {neg_verb_present} you.").data negative += self.editor.template("You are {neg_adj}.").data template = Perturb.perturb(data, _add_phrase_function(positive), nsamples=num_test_cases) test = DIR( template.data, Expect.pairwise(self._diff_up), name="Add positive phrases", capability="Vocabulary", description= "Add very positive phrases (e.g. I love you) to the end of sentences, " "expect probability of positive to NOT go down (tolerance=0.1)", ) self.add_test(test) template = Perturb.perturb(data, _add_phrase_function(negative), nsamples=num_test_cases) test = DIR( template.data, Expect.pairwise(self._diff_down), name="Add negative phrases", capability="Vocabulary", description= "Add very negative phrases (e.g. I hate you) to the end of sentences, " "expect probability of positive to NOT go up (tolerance=0.1)", ) self.add_test(test) def _default_robustness_tests(self, data: Optional[Iterable[str]], num_test_cases=100): template = Perturb.perturb(data, utils.add_random_strings, nsamples=num_test_cases) test = INV( template.data, name="Add random urls and handles", capability="Robustness", description= "Add randomly generated urls and handles to the start or end of sentence", ) self.add_test(test) def _default_ner_tests(self, data: Optional[Iterable[str]], num_test_cases=100): if data: template = Perturb.perturb(data, utils.spacy_wrap(Perturb.change_names, ner=True), nsamples=num_test_cases) test = INV( template.data, name="Change names", capability="NER", description="Replace names with other common names", ) self.add_test(test) template = Perturb.perturb(data, utils.spacy_wrap( Perturb.change_location, ner=True), nsamples=num_test_cases) test = INV( template.data, name="Change locations", capability="NER", description= "Replace city or country names with other cities or countries", ) self.add_test(test) template = Perturb.perturb(data, utils.spacy_wrap(Perturb.change_number, ner=True), nsamples=num_test_cases) test = INV( template.data, name="Change numbers", capability="NER", description= "Replace integers with random integers within a 20% radius of the original", ) self.add_test(test) def _default_temporal_tests(self, data: Optional[Iterable[str]], num_test_cases=100): self._setup_editor() change = ["but", "even though", "although", ""] template = self.editor.template( [ "I used to think this {noun} was {neg_adj}, {change} now I think it is {pos_adj}.", "I think this {noun} is {pos_adj}, {change} I used to think it was {neg_adj}.", "In the past I thought this {noun} was {neg_adj}, {change} now I think it is {pos_adj}.", "I think this {noun} is {pos_adj}, {change} in the past I thought it was {neg_adj}.", ], change=change, unroll=True, nsamples=num_test_cases, save=True, labels=self._positive, ) template += self.editor.template( [ "I used to {neg_verb_present} this {noun}, {change} now I {pos_verb_present} it.", "I {pos_verb_present} this {noun}, {change} I used to {neg_verb_present} it.", "In the past I would {neg_verb_present} this {noun}, {change} now I {pos_verb} it.", "I {pos_verb_present} this {noun}, {change} in the past I would {neg_verb_present} it.", ], change=change, unroll=True, nsamples=num_test_cases, save=True, labels=self._positive, ) template += self.editor.template( [ "I used to think this {noun} was {pos_adj}, {change} now I think it is {neg_adj}.", "I think this {noun} is {neg_adj}, {change} I used to think it was {pos_adj}.", "In the past I thought this {noun} was {pos_adj}, {change} now I think it is {neg_adj}.", "I think this {noun} is {neg_adj}, {change} in the past I thought it was {pos_adj}.", ], change=change, unroll=True, nsamples=num_test_cases, save=True, labels=self._negative, ) template += self.editor.template( [ "I used to {pos_verb_present} this {noun}, {change} now I {neg_verb_present} it.", "I {neg_verb_present} this {noun}, {change} I used to {pos_verb_present} it.", "In the past I would {pos_verb_present} this {noun}, {change} now I {neg_verb_present} it.", "I {neg_verb_present} this {noun}, {change} in the past I would {pos_verb_present} it.", ], change=change, unroll=True, nsamples=num_test_cases, save=True, labels=self._negative, ) test = MFT( **template, name="Used to, but now", capability="Temporal", description="Have two conflicing statements, one about the past and " "one about the present." "Expect the present to carry the sentiment. Examples:" "I used to love this airline, now I hate it -> should be negative" "I love this airline, although I used to hate it -> should be positive", ) self.add_test(test) adjectives = self.editor.lexicons["pos_adj"] + self.editor.lexicons[ "neg_adj"] verbs = self.editor.lexicons[ "pos_verb_present"] + self.editor.lexicons["neg_verb_present"] template = self.editor.template( [ "{it} {be} {a:adj} {noun}.", "I used to think {it} {be} {a:adj} {noun}." ], it=["it", "this", "that"], be=["is", "was"], adj=adjectives, save=True, nsamples=num_test_cases, ) template += self.editor.template( ["{i} {verb} {the} {noun}.", "{i} used to {verb} {the} {noun}."], i=["I", "We"], the=["this", "that", "the"], verb=verbs, save=True, nsamples=num_test_cases, ) test = DIR( template.data, self.monotonic_label_down, templates=template.templates, name="'Used to' should reduce", capability="Temporal", description= "A model should not be more confident on 'I used to think X' " "when compared to 'X', e.g. 'I used to love this restaurant' " "should have less confidence than 'I love this restaurant'", ) self.add_test(test) def _default_fairness_tests(self, data: Optional[Iterable[str]], num_test_cases=100): protected = { "race": ["a black", "a hispanic", "a white", "an asian"], # add more here. "sexuality": self.editor.template("{a:sexual_adj}").data, "religion": self.editor.template("{a:religion_adj}").data, "nationality": self.editor.template("{a:nationality}").data, } for p, vals in protected.items(): template = self.editor.template( ["{male} is %s {mask}." % r for r in vals], return_maps=False, nsamples=num_test_cases, save=True, ) template += self.editor.template( ["{female} is %s {mask}." % r for r in vals], return_maps=False, nsamples=num_test_cases, save=True, ) test = INV( template.data, threshold=0.1, templates=template.templates, name="Protected: %s" % p, capability="Fairness", description= "Prediction should be the same for various adjectives within a protected class", ) self.add_test(test) def _default_negation_tests(self, data: Optional[Iterable[str]], num_test_cases=100): template = self.editor.template( "{it} {noun} {nt} {pos_adj}.", it=["This", "That", "The"], nt=["is not", "isn't"], save=True, nsamples=num_test_cases, ) template += self.editor.template( "{it} {benot} {a:pos_adj} {noun}.", it=["It", "This", "That"], benot=["is not", "isn't", "was not", "wasn't"], save=True, nsamples=num_test_cases, ) neg = [ "I can't say I", "I don't", "I would never say I", "I don't think I", "I didn't" ] template += self.editor.template( "{neg} {pos_verb_present} {the} {noun}.", neg=neg, the=["this", "that", "the"], save=True, nsamples=num_test_cases, ) template += self.editor.template( "No one {pos_verb_present}s {the} {noun}.", neg=neg, the=["this", "that", "the"], save=True, nsamples=num_test_cases, ) test = MFT( template.data, labels=self._negative, templates=template.templates, name="Simple negations: negative", capability="Negation", description="Very simple negations of positive statements", ) self.add_test(test) template = self.editor.template( "I thought {it} {noun} would be {pos_adj}, but it {neg}.", neg=["was not", "wasn't"], it=["this", "that", "the"], nt=["is not", "isn't"], save=True, nsamples=num_test_cases, ) template += self.editor.template( "I thought I would {pos_verb_present} {the} {noun}, but I {neg}.", neg=["did not", "didn't"], the=["this", "that", "the"], save=True, nsamples=num_test_cases, ) test = MFT( template.data, labels=self._negative, templates=template.templates, name="Simple negations: I thought x was positive, but it was not", capability="Negation", description="", ) self.add_test(test) def _positive_change(self, orig_conf: np.ndarray, conf: np.ndarray) -> float: """ Returns the change in the confidence scores. """ return (orig_conf[self._negative] - conf[self._negative] + conf[self._positive] - orig_conf[self._positive]) def _diff_up( self, orig_pred: int, pred: int, orig_conf: np.ndarray, conf: np.ndarray, labels: Optional[int] = None, meta: Optional[List] = None, ) -> Union[bool, float]: """ These arguments are expected by `checklist.expect.Expect.pairwise` function. We only use `orig_conf` and `conf` in this case. `orig_conf` is the confidence score of the first example in a test's input data pair. A `bool` output indicates whether the test passed the expectation (always `True` in this function's case). A `float` output indicates the magnitude of the failure. """ tolerance = 0.1 change = self._positive_change(orig_conf, conf) if change + tolerance >= 0: return True else: return change + tolerance def _diff_down( self, orig_pred: int, pred: int, orig_conf: np.ndarray, conf: np.ndarray, labels: Optional[int] = None, meta: Optional[List] = None, ) -> Union[bool, float]: """ These arguments are expected by `checklist.expect.Expect.pairwise` function. We only use `orig_conf` and `conf` in this case. `orig_conf` is the confidence score of the first example in a test's input data pair. A `bool` output indicates whether the test passed the expectation (always `True` in this function's case). A `float` output indicates the magnitude of the failure. """ tolerance = 0.1 change = self._positive_change(orig_conf, conf) if change - tolerance <= 0: return True else: return -(change - tolerance)
def _setup_editor(self): if not hasattr(self, "editor"): self.editor = Editor() pos_adj = [ "good", "great", "excellent", "amazing", "extraordinary", "beautiful", "fantastic", "nice", "incredible", "exceptional", "awesome", "perfect", "fun", "adorable", "brilliant", "exciting", "sweet", "wonderful", ] neg_adj = [ "awful", "bad", "horrible", "weird", "rough", "lousy", "unhappy", "average", "difficult", "poor", "sad", "frustrating", "hard", "lame", "nasty", "annoying", "boring", "creepy", "dreadful", "ridiculous", "terrible", "ugly", "unpleasant", ] self.editor.add_lexicon("pos_adj", pos_adj, overwrite=True) self.editor.add_lexicon("neg_adj", neg_adj, overwrite=True) pos_verb_present = [ "like", "enjoy", "appreciate", "love", "recommend", "admire", "value", "welcome", ] neg_verb_present = [ "hate", "dislike", "regret", "abhor", "dread", "despise" ] pos_verb_past = [ "liked", "enjoyed", "appreciated", "loved", "admired", "valued", "welcomed", ] neg_verb_past = [ "hated", "disliked", "regretted", "abhorred", "dreaded", "despised" ] self.editor.add_lexicon("pos_verb_present", pos_verb_present, overwrite=True) self.editor.add_lexicon("neg_verb_present", neg_verb_present, overwrite=True) self.editor.add_lexicon("pos_verb_past", pos_verb_past, overwrite=True) self.editor.add_lexicon("neg_verb_past", neg_verb_past, overwrite=True) self.editor.add_lexicon("pos_verb", pos_verb_present + pos_verb_past, overwrite=True) self.editor.add_lexicon("neg_verb", neg_verb_present + neg_verb_past, overwrite=True) noun = [ "airline", "movie", "product", "customer service", "restaurant", "hotel", "food", "staff", "company", "crew", "service", ] self.editor.add_lexicon("noun", noun, overwrite=True) intens_adj = [ "very", "really", "absolutely", "truly", "extremely", "quite", "incredibly", "amazingly", "especially", "exceptionally", "unbelievably", "utterly", "exceedingly", "rather", "totally", "particularly", ] intens_verb = [ "really", "absolutely", "truly", "extremely", "especially", "utterly", "totally", "particularly", "highly", "definitely", "certainly", "genuinely", "honestly", "strongly", "sure", "sincerely", ] self.editor.add_lexicon("intens_adj", intens_adj, overwrite=True) self.editor.add_lexicon("intens_verb", intens_verb, overwrite=True) reducer_adj = [ "somewhat", "kinda", "mostly", "probably", "generally", "reasonably", "a little", "a bit", "slightly", ] self.editor.add_lexicon("reducer_adj", reducer_adj, overwrite=True) self.monotonic_label = Expect.monotonic(increasing=True, tolerance=0.1) self.monotonic_label_down = Expect.monotonic(increasing=False, tolerance=0.1)
def add_common_lexicons(editor: Editor): """ Add commonly used lexicons to the editor object. These can be used in all the task suites. Note: Updates the `editor` object in place. """ profession = [ "journalist", "historian", "secretary", "nurse", "waitress", "accountant", "engineer", "attorney", "artist", "editor", "architect", "model", "interpreter", "analyst", "actor", "actress", "assistant", "intern", "economist", "organizer", "author", "investigator", "agent", "administrator", "executive", "educator", "investor", "DJ", "entrepreneur", "auditor", "advisor", "instructor", "activist", "consultant", "apprentice", "reporter", "expert", "psychologist", "examiner", "painter", "manager", "contractor", "therapist", "programmer", "musician", "producer", "associate", "intermediary", "designer", "cook", "salesperson", "dentist", "attorney", "detective", "banker", "researcher", "cop", "driver", "counselor", "clerk", "professor", "tutor", "coach", "chemist", "scientist", "veterinarian", "firefighter", "baker", "psychiatrist", "prosecutor", "director", "technician", ] editor.add_lexicon("profession", profession, overwrite=True)
def replace_john_with_others(x, *args, **kwargs): # Returns empty (if John is not present) or list of strings with John replaced by Luke and Mark if not re.search(r'\bJohn\b', x): return None return [re.sub(r'\bJohn\b', n, x) for n in ['Luke', 'Mark']] dataset = ['John is a man', 'Mary is a woman', 'John is an apostle'] ret = Perturb.perturb(dataset, replace_john_with_others) ret.data # In[12]: import checklist from checklist.editor import Editor from checklist.perturb import Perturb from checklist.test_types import MFT, INV, DIR editor = Editor() t = editor.template('This is {a:adj} {mask}.', adj=['good', 'great', 'excellent', 'awesome']) test1 = MFT(t.data, labels=1, name='Simple positives', capability='Vocabulary', description='') # In[ ]:
#!/usr/bin/env python # coding: utf-8 # In[1]: import checklist from checklist.editor import Editor from checklist.perturb import Perturb # In[4]: #VOCAB+POS--MFT editor = Editor(language='chinese') con = '店家服务是{adj}的' ret = editor.template(con,adj=['非常好', '不错', '垃圾的', '无与伦比的', '差劲', '优美的', '鸟语花香', '百花齐放', '繁花似锦', '桃红柳绿', '春色满园', '春意盎然','喜上眉梢','兴高采烈','眉飞色舞','喜笑颜开','欣喜若狂','心花怒放','相当好',]) ret.data # In[11]: #VOCAB+POS--INV editor = Editor(language='chinese') con = '这次又买了一个{adj}' ret = editor.template(con,adj=['小米10', '小天鹅', '华为mate40', '苹果12', '锤子', '魅族', '三星', '奔驰', '宝马', '劳力士', '酷派','菲仕乐','WMF','康宁','苏泊尔','健力宝','可口可乐','滴滴','回力','LV']) ret.data
#!/usr/bin/env python # coding: utf-8 # In[1]: import checklist from checklist.editor import Editor from checklist.perturb import Perturb # In[2]: editor = Editor(language='chinese') # In[6]: ret = editor.template('这是一个{adj}电影', adj=['好看的', '不好看的', '垃圾的', '无与伦比的', '新奇的', '优美的']) ret.data # In[15]: ret = editor.template('{male}是小米人吗', remove_duplicates=True) ret.data[0:10] # In[17]: editor.add_lexicon('adj', ['good', 'bad', 'great', 'terrible']) # In[18]: ret = editor.template('{adj} is not the same as {adj2}',
def object_test(): global food_ret, sport_ret, drug_ret, nondrug_ret """ codeDictionary = {"D":0, "M":1, "S":2, "H":3, "F":4, "O":5, "E":6, "NA":7} """ editor = Editor() food_ret1 = editor.template('How often do you get {food}?', food=food, labels=0, save=True) #, nsamples=100) a food_ret2 = editor.template('I can\'t stop thinking about {food}!', food=food, labels=0, save=True) #, nsamples=100) food_ret = food_ret1 + food_ret2 mft_food = MFT(food_ret.data, labels=food_ret.labels, name='Object Rec: Food', capability='Objects', description='Food') sport_ret1 = editor.template('I have to participate in {sport}?', sport=sport, labels=6, save=True) #, nsamples=100) sport_ret2 = editor.template( 'It is good to move your body, like doing {sport}.', sport=sport, labels=6, save=True) #, nsamples=100) sport_ret = sport_ret1 + sport_ret2 mft_sport = MFT(sport_ret.data, labels=sport_ret.labels, name='Object Rec: Sport', capability='Objects', description='Sport') nondrug_ret1 = editor.template('How often do you take {nondrug}?', nondrug=nondrug, labels=5) #, save=True) #, nsamples=100) nondrug_ret2 = editor.template( 'Have you taken {nondrug} for the last five months?', nondrug=nondrug, labels=5) #, save=True) #, nsamples=100) nondrug_ret = nondrug_ret1 + nondrug_ret2 mft_nondrug = MFT(nondrug_ret.data, labels=nondrug_ret.labels, name='Object Rec: Non Drug', capability='Objects', description='Non Drug') drug_ret1 = editor.template('How often do you get {drug}?', drug=drug, labels=1, save=True) #, nsamples=100) drug_ret2 = editor.template( 'Have you taken {drug} for the last five months?', drug=drug, labels=1, save=True) #, nsamples=100) drug_ret = drug_ret1 + drug_ret2 mft_drug = MFT(drug_ret.data, labels=drug_ret.labels, name='Object Rec: Drug', capability='Objects', description='Drug') #print(nondrug_ret.data) nt = Perturb.perturb(nondrug_ret.data, swap_nondrug) inv_n = INV(**nt, name='swap nondrug name in both questions', capability='objects', description='') #print(len(nt.data)) #exit() """ import numpy as np def pp(inputs): p1 = np.array([0.5 for x in inputs]).reshape(-1, 1) p0 = 1- p1 return np.hstack((p0, p1)) from checklist.pred_wrapper import PredictorWrapper wrapped = PredictorWrapper.wrap_softmax(pp) inv_n.run(wrapped) """ dt = Perturb.perturb(drug_ret.data, swap_drug) inv_d = INV(**dt, name='swap drug name in both questions', capability='objects', description='') nondrug_monodec = Expect.monotonic(label=5, increasing=False, tolerance=0.1) drug_monodec = Expect.monotonic(label=1, increasing=False, tolerance=0.1) ndt = Perturb.perturb(nondrug_ret.data, swap_nd) dir_nd = DIR(**ndt, expect=nondrug_monodec) dnt = Perturb.perturb(drug_ret.data, swap_dn) dir_dn = DIR(**dnt, expect=drug_monodec) # diet #exercise # other # medical # other # medical, # o -> m, # m->o tests = [mft_food, mft_sport, mft_nondrug, mft_drug] #, inv_n , inv_d, dir_nd, dir_dn ] names = [ x.strip(",") for x in "mft_food, mft_sport, mft_nondrug, mft_drug".split() ] #, inv_n, inv_d, dir_nd, dir_dn".split() ] assert (len(tests) == len(names)) for test, name in zip(tests, names): test.to_raw_file('./tests/' + name + '.txt') return tests, names
def main(): editor = Editor() adjpos = """ great fun lovely sexy wonderful amazing awesome good fantastic excelent masterful fabulous incredible salacious""" adjpos = adjpos.strip().split() adjneg = """terrible horrible shitty worst disasterous bad abominable atrocious crappy repulsive yucky irredeemable""" adjneg = adjneg.strip().split() rev_neg_pos = "some parts are {pos} but overall it is {neg}." rev_pos_neg = "some parts are {neg} but overall it is {pos}." final_data = [] id_counter = 0 out_0 = editor.template(rev_neg_pos, pos=adjpos, neg=adjneg) #print(out_0) out_1 = editor.template(rev_pos_neg, pos=adjpos, neg=adjneg) #print(out_1) seed = 'I had a {neg} day really {neg} but the album made it {pos}' ous_0 = editor.template(seed, pos=adjpos, neg=adjneg) #print(ous_0) seed = 'I had a {pos} day really {pos} but the album made it {neg}' ous_1 = editor.template(seed, pos=adjpos, neg=adjneg) #print(ous_1) with open('../data/music_reviews_dev.json', 'r') as f: data = [] for line in f.readlines(): data.append(json.loads(line)) data = random.sample(data, 100) tmp = [] tmp_labels = [] for d in data: tmp.append(d.get('reviewText')) tmp_labels.append(d.get('sentiment')) baseline = "{male} is my {religion_adj}, {sexual_adj} friend, and he showed me this song. I found it {pos}" ret_pos = editor.template(baseline, pos=adjpos) ret_neg = editor.template(baseline, pos=adjneg) for idx, entry in enumerate( random.sample( Perturb.perturb(tmp, Perturb.add_typos, keep_original=False).data, 100)): final_data.append(to_json(entry[0], tmp_labels[idx], id_counter)) id_counter += 1 for entry in random.sample(out_0.data, 100): final_data.append(to_json(entry, "negative", id_counter)) id_counter += 1 for entry in random.sample(out_1.data, 100): final_data.append(to_json(entry, "positive", id_counter)) id_counter += 1 for entry in random.sample(ous_0.data, 100): final_data.append(to_json(entry, "positive", id_counter)) id_counter += 1 for entry in random.sample(ous_1.data, 100): final_data.append(to_json(entry, "negative", id_counter)) id_counter += 1 for entry in random.sample(ret_pos.data, 100): final_data.append(to_json(entry, "positive", id_counter)) id_counter += 1 for entry in random.sample(ret_neg.data, 100): final_data.append(to_json(entry, "negative", id_counter)) id_counter += 1 with open('../data/music_reviews_yucky.json', 'w') as file: for line in final_data: file.write(json.dumps(line)) file.write("\n")
""" Script to generate pairs of sentences in direct / indirect object alternation, using CheckList and RoBERTa. """ import checklist from checklist.editor import Editor import pickle VERBS = ['gave', 'sent', 'mailed', 'brought', 'showed', 'sold'] editor = Editor() sentences = set() for vb in VERBS: # Only use the first 3 masked words. The second sentence seems to make it generate # sentences that are equally likely in both syntactic positions. ret = editor.suggest( f'The {{mask}} {vb} the {{mask}} a {{mask}}. The {{mask}} {vb} a {{mask}} to the {{mask}}.', ) for t in ret: subj = t[0] iobj = t[1] dobj = t[2] if subj == iobj or iobj == dobj or subj == dobj: continue sent1 = f'The {subj} {vb} the {iobj} a {dobj}.' sent2 = f'The {subj} {vb} a {dobj} to the {iobj}.' sentences.add((sent1, sent2)) for sent1, sent2 in sentences: