示例#1
0
 def _setup_editor(self):
     """
     Sets up a `checklist.editor.Editor` object, to be used for adding
     default tests to the suite.
     """
     if not hasattr(self, "editor"):
         self.editor = Editor()
示例#2
0
def robustness_test():
    editor = Editor()
    #food_ret = editor.template('How often do you get {food}?', food=food, labels=0, save=True) #, nsamples=100)

    pdata = list(processor.pipe(food_ret.data))
    perturbed_punct = Perturb.perturb(pdata,
                                      Perturb.punctuation,
                                      keep_original=False)
    perturbed_typo = Perturb.perturb(food_ret.data,
                                     Perturb.add_typos,
                                     keep_original=False)

    inv_food_punct = INV(**perturbed_punct,
                         name='Minor Changes: Punctuation',
                         capability='robustness',
                         description='')
    inv_food_typo = INV(**perturbed_typo,
                        name='Minor Changes: Typos',
                        capability='robustness',
                        description='')

    pdata = list(processor.pipe(drug_ret.data))
    perturbed_punct = Perturb.perturb(pdata,
                                      Perturb.punctuation,
                                      keep_original=False)
    perturbed_typo = Perturb.perturb(drug_ret.data,
                                     Perturb.add_typos,
                                     keep_original=False)

    inv_drug_punct = INV(**perturbed_punct,
                         name='Minor Changes: Punctuation',
                         capability='robustness',
                         description='')
    inv_drug_typo = INV(**perturbed_typo,
                        name='Minor Changes: Typos',
                        capability='robustness',
                        description='')

    #Perturb.contract
    #Perturb.expand_contractions
    #Perturb.contractions
    #Perturb.change_names
    #Perturb.change_location
    #Perturb.change_number

    tests = [inv_food_punct, inv_food_typo, inv_drug_punct, inv_drug_typo]

    names = [
        "inv_food_punct", "inv_food_typo", "inv_drug_punct", "inv_drug_typo"
    ]

    for test, name in zip(tests, names):
        test.to_raw_file('./tests/' + name + '.txt')

    return tests, names
示例#3
0
class TestTestTypes(TestCase):
    editor = Editor()
    dummy_test_data = editor.template(
        templates=["example 1", "example 2"], meta=False, labels=["label1", "label2"])
    test_id = "100"

    def test_mft_w_test_id(self):
        mft_test = MFT(**self.dummy_test_data,
                       expect=Expect.eq(),
                       name="mft test with test id",
                       test_id=self.test_id)
        assert mft_test.test_id == self.test_id

    def test_mft_wo_test_id(self):
        mft_test = MFT(**self.dummy_test_data,
                       expect=Expect.eq(),
                       name="mft test without test id")
        assert mft_test.test_id is None

    def test_dir_w_test_id(self):
        dir_test = DIR(**self.dummy_test_data,
                       expect=Expect.eq(),
                       name="dir test with test id",
                       test_id=self.test_id)
        assert dir_test.test_id == self.test_id

    def test_dir_wo_test_id(self):
        dir_test = DIR(**self.dummy_test_data,
                       expect=Expect.eq(),
                       name="dir test without test id")
        assert dir_test.test_id is None

    def test_inv_w_test_id(self):
        inv_test = INV(**self.dummy_test_data,
                       expect=Expect.eq(),
                       name="inv test with test id",
                       test_id=self.test_id)
        assert inv_test.test_id == self.test_id

    def test_inv_wo_test_id(self):
        inv_test = INV(**self.dummy_test_data,
                       expect=Expect.eq(),
                       name="inv test without test id")
        assert inv_test.test_id is None
def get_template(config):
    editor = Editor()
    entailment = config["entailment"]
    overlap = config["overlap"]
    if entailment and overlap:
        return [
            editor.template(
                {
                    'premise': '{first_name} is {adj1} but not {adj2}.',
                    'hypothesis': '{first_name} is {adj1}.'
                },
                labels=1,
                adj=ADJ,
                remove_duplicates=True),
            editor.template(
                {
                    'premise': '{first_name} is {adj1} but not {adj2}.',
                    'hypothesis': '{first_name} is not {adj2}.'
                },
                labels=1,
                adj=ADJ,
                remove_duplicates=True)
        ]
    elif overlap:
        return [
            editor.template(
                {
                    'premise': '{first_name} is {adj1} but not {adj2}.',
                    'hypothesis': '{first_name} is {adj2}.'
                },
                labels=0,
                adj=ADJ,
                remove_duplicates=True)
        ]
    else:
        return [
            editor.template(
                {
                    'premise': '{first_name} is {adj1} but not {adj2}.',
                    'hypothesis': '{first_name} is not {adj1}.'
                },
                labels=0,
                adj=ADJ,
                remove_duplicates=True)
        ]
示例#5
0
def generate_sents(template, words):
    editor = Editor()
    ret = editor.template(template, word=words)
    return ret.data
示例#6
0
def generate_words(suggest_sentence):
    editor = Editor()
    words = editor.suggest(suggest_sentence)

    return words
示例#7
0
def make_suite():
    suite = TestSuite()
    editor = Editor()
#!/usr/bin/env python
# coding: utf-8

# # CheckList Playground
#
# Useful for coming up with a lot of words of some class (eg: transitive verb).

# In[1]:

import checklist
from checklist.editor import Editor

# In[2]:

editor = Editor()

# In[9]:

editor.suggest('The {mask} showed him his car.')
示例#9
0
class SentimentAnalysisSuite(TaskSuite):
    """
    This suite was built using the checklist process with the self.editor
    suggestions. Users are encouraged to add/modify as they see fit.

    Note: `editor.suggest(...)` can be slow as it runs a language model.
    """
    def __init__(
        self,
        suite: Optional[TestSuite] = None,
        positive: Optional[int] = 0,
        negative: Optional[int] = 1,
        **kwargs,
    ):

        self._positive = positive
        self._negative = negative
        super().__init__(suite, **kwargs)

    @overrides
    def _prediction_and_confidence_scores(self, predictor):
        def preds_and_confs_fn(data):
            labels = []
            confs = []
            if isinstance(data[0], Instance):
                predictions = predictor.predict_batch_instance(data)
            else:
                data = [{"sentence": sentence} for sentence in data]
                predictions = predictor.predict_batch_json(data)
            for pred in predictions:
                label = pred["probs"].index(max(pred["probs"]))
                labels.append(label)
                confs.append([
                    pred["probs"][self._positive],
                    pred["probs"][self._negative]
                ])
            return np.array(labels), np.array(confs)

        return preds_and_confs_fn

    @overrides
    def _format_failing_examples(
        self,
        inputs: Tuple,
        pred: int,
        conf: Union[np.array, np.ndarray],
        label: Optional[int] = None,
        *args,
        **kwargs,
    ):
        """
        Formatting function for printing failed test examples.
        """
        labels = {self._positive: "Positive", self._negative: "Negative"}
        ret = str(inputs)
        if label is not None:
            ret += " (Original: %s)" % labels[label]
        ret += "\nPrediction: %s (Confidence: %.1f)" % (labels[pred],
                                                        conf[pred])

        return ret

    @overrides
    def _default_tests(self,
                       data: Optional[Iterable[str]],
                       num_test_cases=100):
        super()._default_tests(data, num_test_cases)
        self._setup_editor()
        self._default_vocabulary_tests(data, num_test_cases)
        self._default_ner_tests(data, num_test_cases)
        self._default_temporal_tests(data, num_test_cases)
        self._default_fairness_tests(data, num_test_cases)
        self._default_negation_tests(data, num_test_cases)

    def _setup_editor(self):
        if not hasattr(self, "editor"):
            self.editor = Editor()

            pos_adj = [
                "good",
                "great",
                "excellent",
                "amazing",
                "extraordinary",
                "beautiful",
                "fantastic",
                "nice",
                "incredible",
                "exceptional",
                "awesome",
                "perfect",
                "fun",
                "adorable",
                "brilliant",
                "exciting",
                "sweet",
                "wonderful",
            ]
            neg_adj = [
                "awful",
                "bad",
                "horrible",
                "weird",
                "rough",
                "lousy",
                "unhappy",
                "average",
                "difficult",
                "poor",
                "sad",
                "frustrating",
                "hard",
                "lame",
                "nasty",
                "annoying",
                "boring",
                "creepy",
                "dreadful",
                "ridiculous",
                "terrible",
                "ugly",
                "unpleasant",
            ]
            self.editor.add_lexicon("pos_adj", pos_adj, overwrite=True)
            self.editor.add_lexicon("neg_adj", neg_adj, overwrite=True)

            pos_verb_present = [
                "like",
                "enjoy",
                "appreciate",
                "love",
                "recommend",
                "admire",
                "value",
                "welcome",
            ]
            neg_verb_present = [
                "hate", "dislike", "regret", "abhor", "dread", "despise"
            ]
            pos_verb_past = [
                "liked",
                "enjoyed",
                "appreciated",
                "loved",
                "admired",
                "valued",
                "welcomed",
            ]
            neg_verb_past = [
                "hated", "disliked", "regretted", "abhorred", "dreaded",
                "despised"
            ]
            self.editor.add_lexicon("pos_verb_present",
                                    pos_verb_present,
                                    overwrite=True)
            self.editor.add_lexicon("neg_verb_present",
                                    neg_verb_present,
                                    overwrite=True)
            self.editor.add_lexicon("pos_verb_past",
                                    pos_verb_past,
                                    overwrite=True)
            self.editor.add_lexicon("neg_verb_past",
                                    neg_verb_past,
                                    overwrite=True)
            self.editor.add_lexicon("pos_verb",
                                    pos_verb_present + pos_verb_past,
                                    overwrite=True)
            self.editor.add_lexicon("neg_verb",
                                    neg_verb_present + neg_verb_past,
                                    overwrite=True)

            noun = [
                "airline",
                "movie",
                "product",
                "customer service",
                "restaurant",
                "hotel",
                "food",
                "staff",
                "company",
                "crew",
                "service",
            ]
            self.editor.add_lexicon("noun", noun, overwrite=True)

            intens_adj = [
                "very",
                "really",
                "absolutely",
                "truly",
                "extremely",
                "quite",
                "incredibly",
                "amazingly",
                "especially",
                "exceptionally",
                "unbelievably",
                "utterly",
                "exceedingly",
                "rather",
                "totally",
                "particularly",
            ]
            intens_verb = [
                "really",
                "absolutely",
                "truly",
                "extremely",
                "especially",
                "utterly",
                "totally",
                "particularly",
                "highly",
                "definitely",
                "certainly",
                "genuinely",
                "honestly",
                "strongly",
                "sure",
                "sincerely",
            ]

            self.editor.add_lexicon("intens_adj", intens_adj, overwrite=True)
            self.editor.add_lexicon("intens_verb", intens_verb, overwrite=True)

            reducer_adj = [
                "somewhat",
                "kinda",
                "mostly",
                "probably",
                "generally",
                "reasonably",
                "a little",
                "a bit",
                "slightly",
            ]

            self.editor.add_lexicon("reducer_adj", reducer_adj, overwrite=True)

            self.monotonic_label = Expect.monotonic(increasing=True,
                                                    tolerance=0.1)
            self.monotonic_label_down = Expect.monotonic(increasing=False,
                                                         tolerance=0.1)

    def _default_vocabulary_tests(self,
                                  data: Optional[Iterable[str]],
                                  num_test_cases=100):

        positive_words = (self.editor.lexicons["pos_adj"] +
                          self.editor.lexicons["pos_verb_present"] +
                          self.editor.lexicons["pos_verb_past"])

        test = MFT(
            positive_words,
            labels=self._positive,
            name="Single Positive Words",
            capability="Vocabulary",
            description="Correctly recognizes positive words",
        )

        self.add_test(test)

        negative_words = (self.editor.lexicons["neg_adj"] +
                          self.editor.lexicons["neg_verb_present"] +
                          self.editor.lexicons["neg_verb_past"])

        test = MFT(
            negative_words,
            labels=self._negative,
            name="Single Negative Words",
            capability="Vocabulary",
            description="Correctly recognizes negative words",
        )

        self.add_test(test)

        template = self.editor.template(
            "{it} {noun} {be} {pos_adj}.",
            it=["The", "This", "That"],
            be=["is", "was"],
            labels=self._positive,
            save=True,
        )
        template += self.editor.template(
            "{it} {be} {a:pos_adj} {noun}.",
            it=["It", "This", "That"],
            be=["is", "was"],
            labels=self._positive,
            save=True,
        )
        template += self.editor.template(
            "{i} {pos_verb} {the} {noun}.",
            i=["I", "We"],
            the=["this", "that", "the"],
            labels=self._positive,
            save=True,
        )
        template += self.editor.template(
            "{it} {noun} {be} {neg_adj}.",
            it=["That", "This", "The"],
            be=["is", "was"],
            labels=self._negative,
            save=True,
        )
        template += self.editor.template(
            "{it} {be} {a:neg_adj} {noun}.",
            it=["It", "This", "That"],
            be=["is", "was"],
            labels=self._negative,
            save=True,
        )
        template += self.editor.template(
            "{i} {neg_verb} {the} {noun}.",
            i=["I", "We"],
            the=["this", "that", "the"],
            labels=self._negative,
            save=True,
        )

        test = MFT(
            **template,
            name="Sentiment-laden words in context",
            capability="Vocabulary",
            description="Use positive and negative verbs and adjectives "
            "with nouns such as product, movie, airline, etc. "
            'E.g. "This was a bad movie"',
        )

        self.add_test(test)

        template = self.editor.template(
            [
                "{it} {be} {a:pos_adj} {noun}.",
                "{it} {be} {a:intens_adj} {pos_adj} {noun}."
            ],
            it=["It", "This", "That"],
            be=["is", "was"],
            nsamples=num_test_cases,
            save=True,
        )
        template += self.editor.template(
            [
                "{i} {pos_verb} {the} {noun}.",
                "{i} {intens_verb} {pos_verb} {the} {noun}."
            ],
            i=["I", "We"],
            the=["this", "that", "the"],
            nsamples=num_test_cases,
            save=True,
        )
        template += self.editor.template(
            [
                "{it} {be} {a:neg_adj} {noun}.",
                "{it} {be} {a:intens_adj} {neg_adj} {noun}."
            ],
            it=["It", "This", "That"],
            be=["is", "was"],
            nsamples=num_test_cases,
            save=True,
        )
        template += self.editor.template(
            [
                "{i} {neg_verb} {the} {noun}.",
                "{i} {intens_verb} {neg_verb} {the} {noun}."
            ],
            i=["I", "We"],
            the=["this", "that", "the"],
            nsamples=num_test_cases,
            save=True,
        )

        test = DIR(
            template.data,
            self.monotonic_label,
            templates=template.templates,
            name="Intensifiers",
            capability="Vocabulary",
            description=
            "Test is composed of pairs of sentences (x1, x2), where we add an intensifier"
            "such as 'really',or 'very' to x2 and expect the confidence to NOT go down "
            "(with tolerance=0.1). e.g.:"
            "x1 = 'That was a good movie'"
            "x2 = 'That was a very good movie'",
        )

        self.add_test(test)

        template = self.editor.template(
            [
                "{it} {noun} {be} {pos_adj}.",
                "{it} {noun} {be} {reducer_adj} {pos_adj}."
            ],
            it=["The", "This", "That"],
            be=["is", "was"],
            nsamples=num_test_cases,
            save=True,
        )
        template += self.editor.template(
            [
                "{it} {noun} {be} {neg_adj}.",
                "{it} {noun} {be} {reducer_adj} {neg_adj}."
            ],
            it=["The", "This", "That"],
            be=["is", "was"],
            nsamples=num_test_cases,
            save=True,
        )
        test = DIR(
            template.data,
            self.monotonic_label_down,
            templates=template.templates,
            name="Reducers",
            capability="Vocabulary",
            description=
            "Test is composed of pairs of sentences (x1, x2), where we add a reducer"
            "such as 'somewhat', or 'kinda' to x2 and expect the confidence to NOT go up "
            " (with tolerance=0.1). e.g.:"
            "x1 = 'The staff was good.'"
            "x2 = 'The staff was somewhat good.'",
        )

        self.add_test(test)

        if data:

            positive = self.editor.template("I {pos_verb_present} you.").data
            positive += self.editor.template("You are {pos_adj}.").data

            negative = self.editor.template("I {neg_verb_present} you.").data
            negative += self.editor.template("You are {neg_adj}.").data

            template = Perturb.perturb(data,
                                       _add_phrase_function(positive),
                                       nsamples=num_test_cases)
            test = DIR(
                template.data,
                Expect.pairwise(self._diff_up),
                name="Add positive phrases",
                capability="Vocabulary",
                description=
                "Add very positive phrases (e.g. I love you) to the end of sentences, "
                "expect probability of positive to NOT go down (tolerance=0.1)",
            )

            self.add_test(test)

            template = Perturb.perturb(data,
                                       _add_phrase_function(negative),
                                       nsamples=num_test_cases)
            test = DIR(
                template.data,
                Expect.pairwise(self._diff_down),
                name="Add negative phrases",
                capability="Vocabulary",
                description=
                "Add very negative phrases (e.g. I hate you) to the end of sentences, "
                "expect probability of positive to NOT go up (tolerance=0.1)",
            )

            self.add_test(test)

    def _default_robustness_tests(self,
                                  data: Optional[Iterable[str]],
                                  num_test_cases=100):

        template = Perturb.perturb(data,
                                   utils.add_random_strings,
                                   nsamples=num_test_cases)
        test = INV(
            template.data,
            name="Add random urls and handles",
            capability="Robustness",
            description=
            "Add randomly generated urls and handles to the start or end of sentence",
        )

        self.add_test(test)

    def _default_ner_tests(self,
                           data: Optional[Iterable[str]],
                           num_test_cases=100):
        if data:
            template = Perturb.perturb(data,
                                       utils.spacy_wrap(Perturb.change_names,
                                                        ner=True),
                                       nsamples=num_test_cases)
            test = INV(
                template.data,
                name="Change names",
                capability="NER",
                description="Replace names with other common names",
            )
            self.add_test(test)

            template = Perturb.perturb(data,
                                       utils.spacy_wrap(
                                           Perturb.change_location, ner=True),
                                       nsamples=num_test_cases)
            test = INV(
                template.data,
                name="Change locations",
                capability="NER",
                description=
                "Replace city or country names with other cities or countries",
            )
            self.add_test(test)

            template = Perturb.perturb(data,
                                       utils.spacy_wrap(Perturb.change_number,
                                                        ner=True),
                                       nsamples=num_test_cases)
            test = INV(
                template.data,
                name="Change numbers",
                capability="NER",
                description=
                "Replace integers with random integers within a 20% radius of the original",
            )
            self.add_test(test)

    def _default_temporal_tests(self,
                                data: Optional[Iterable[str]],
                                num_test_cases=100):
        self._setup_editor()

        change = ["but", "even though", "although", ""]
        template = self.editor.template(
            [
                "I used to think this {noun} was {neg_adj}, {change} now I think it is {pos_adj}.",
                "I think this {noun} is {pos_adj}, {change} I used to think it was {neg_adj}.",
                "In the past I thought this {noun} was {neg_adj}, {change} now I think it is {pos_adj}.",
                "I think this {noun} is {pos_adj}, {change} in the past I thought it was {neg_adj}.",
            ],
            change=change,
            unroll=True,
            nsamples=num_test_cases,
            save=True,
            labels=self._positive,
        )
        template += self.editor.template(
            [
                "I used to {neg_verb_present} this {noun}, {change} now I {pos_verb_present} it.",
                "I {pos_verb_present} this {noun}, {change} I used to {neg_verb_present} it.",
                "In the past I would {neg_verb_present} this {noun}, {change} now I {pos_verb} it.",
                "I {pos_verb_present} this {noun}, {change} in the past I would {neg_verb_present} it.",
            ],
            change=change,
            unroll=True,
            nsamples=num_test_cases,
            save=True,
            labels=self._positive,
        )

        template += self.editor.template(
            [
                "I used to think this {noun} was {pos_adj}, {change} now I think it is {neg_adj}.",
                "I think this {noun} is {neg_adj}, {change} I used to think it was {pos_adj}.",
                "In the past I thought this {noun} was {pos_adj}, {change} now I think it is {neg_adj}.",
                "I think this {noun} is {neg_adj}, {change} in the past I thought it was {pos_adj}.",
            ],
            change=change,
            unroll=True,
            nsamples=num_test_cases,
            save=True,
            labels=self._negative,
        )
        template += self.editor.template(
            [
                "I used to {pos_verb_present} this {noun}, {change} now I {neg_verb_present} it.",
                "I {neg_verb_present} this {noun}, {change} I used to {pos_verb_present} it.",
                "In the past I would {pos_verb_present} this {noun}, {change} now I {neg_verb_present} it.",
                "I {neg_verb_present} this {noun}, {change} in the past I would {pos_verb_present} it.",
            ],
            change=change,
            unroll=True,
            nsamples=num_test_cases,
            save=True,
            labels=self._negative,
        )
        test = MFT(
            **template,
            name="Used to, but now",
            capability="Temporal",
            description="Have two conflicing statements, one about the past and "
            "one about the present."
            "Expect the present to carry the sentiment. Examples:"
            "I used to love this airline, now I hate it -> should be negative"
            "I love this airline, although I used to hate it -> should be positive",
        )

        self.add_test(test)

        adjectives = self.editor.lexicons["pos_adj"] + self.editor.lexicons[
            "neg_adj"]
        verbs = self.editor.lexicons[
            "pos_verb_present"] + self.editor.lexicons["neg_verb_present"]

        template = self.editor.template(
            [
                "{it} {be} {a:adj} {noun}.",
                "I used to think {it} {be} {a:adj} {noun}."
            ],
            it=["it", "this", "that"],
            be=["is", "was"],
            adj=adjectives,
            save=True,
            nsamples=num_test_cases,
        )
        template += self.editor.template(
            ["{i} {verb} {the} {noun}.", "{i} used to {verb} {the} {noun}."],
            i=["I", "We"],
            the=["this", "that", "the"],
            verb=verbs,
            save=True,
            nsamples=num_test_cases,
        )
        test = DIR(
            template.data,
            self.monotonic_label_down,
            templates=template.templates,
            name="'Used to' should reduce",
            capability="Temporal",
            description=
            "A model should not be more confident on 'I used to think X' "
            "when compared to 'X', e.g. 'I used to love this restaurant' "
            "should have less confidence than 'I love this restaurant'",
        )

        self.add_test(test)

    def _default_fairness_tests(self,
                                data: Optional[Iterable[str]],
                                num_test_cases=100):
        protected = {
            "race": ["a black", "a hispanic", "a white",
                     "an asian"],  # add more here.
            "sexuality": self.editor.template("{a:sexual_adj}").data,
            "religion": self.editor.template("{a:religion_adj}").data,
            "nationality": self.editor.template("{a:nationality}").data,
        }

        for p, vals in protected.items():
            template = self.editor.template(
                ["{male} is %s {mask}." % r for r in vals],
                return_maps=False,
                nsamples=num_test_cases,
                save=True,
            )
            template += self.editor.template(
                ["{female} is %s {mask}." % r for r in vals],
                return_maps=False,
                nsamples=num_test_cases,
                save=True,
            )
            test = INV(
                template.data,
                threshold=0.1,
                templates=template.templates,
                name="Protected: %s" % p,
                capability="Fairness",
                description=
                "Prediction should be the same for various adjectives within a protected class",
            )

            self.add_test(test)

    def _default_negation_tests(self,
                                data: Optional[Iterable[str]],
                                num_test_cases=100):
        template = self.editor.template(
            "{it} {noun} {nt} {pos_adj}.",
            it=["This", "That", "The"],
            nt=["is not", "isn't"],
            save=True,
            nsamples=num_test_cases,
        )
        template += self.editor.template(
            "{it} {benot} {a:pos_adj} {noun}.",
            it=["It", "This", "That"],
            benot=["is not", "isn't", "was not", "wasn't"],
            save=True,
            nsamples=num_test_cases,
        )
        neg = [
            "I can't say I", "I don't", "I would never say I",
            "I don't think I", "I didn't"
        ]
        template += self.editor.template(
            "{neg} {pos_verb_present} {the} {noun}.",
            neg=neg,
            the=["this", "that", "the"],
            save=True,
            nsamples=num_test_cases,
        )
        template += self.editor.template(
            "No one {pos_verb_present}s {the} {noun}.",
            neg=neg,
            the=["this", "that", "the"],
            save=True,
            nsamples=num_test_cases,
        )
        test = MFT(
            template.data,
            labels=self._negative,
            templates=template.templates,
            name="Simple negations: negative",
            capability="Negation",
            description="Very simple negations of positive statements",
        )

        self.add_test(test)

        template = self.editor.template(
            "I thought {it} {noun} would be {pos_adj}, but it {neg}.",
            neg=["was not", "wasn't"],
            it=["this", "that", "the"],
            nt=["is not", "isn't"],
            save=True,
            nsamples=num_test_cases,
        )
        template += self.editor.template(
            "I thought I would {pos_verb_present} {the} {noun}, but I {neg}.",
            neg=["did not", "didn't"],
            the=["this", "that", "the"],
            save=True,
            nsamples=num_test_cases,
        )
        test = MFT(
            template.data,
            labels=self._negative,
            templates=template.templates,
            name="Simple negations: I thought x was positive, but it was not",
            capability="Negation",
            description="",
        )
        self.add_test(test)

    def _positive_change(self, orig_conf: np.ndarray,
                         conf: np.ndarray) -> float:
        """
        Returns the change in the confidence scores.
        """
        return (orig_conf[self._negative] - conf[self._negative] +
                conf[self._positive] - orig_conf[self._positive])

    def _diff_up(
        self,
        orig_pred: int,
        pred: int,
        orig_conf: np.ndarray,
        conf: np.ndarray,
        labels: Optional[int] = None,
        meta: Optional[List] = None,
    ) -> Union[bool, float]:
        """
        These arguments are expected by `checklist.expect.Expect.pairwise` function.
        We only use `orig_conf` and `conf` in this case.

        `orig_conf` is the confidence score of the first example in a test's input data pair.

        A `bool` output indicates whether the test passed the expectation (always
        `True` in this function's case).

        A `float` output indicates the magnitude of the failure.
        """
        tolerance = 0.1
        change = self._positive_change(orig_conf, conf)
        if change + tolerance >= 0:
            return True
        else:
            return change + tolerance

    def _diff_down(
        self,
        orig_pred: int,
        pred: int,
        orig_conf: np.ndarray,
        conf: np.ndarray,
        labels: Optional[int] = None,
        meta: Optional[List] = None,
    ) -> Union[bool, float]:
        """
        These arguments are expected by `checklist.expect.Expect.pairwise` function.
        We only use `orig_conf` and `conf` in this case.

        `orig_conf` is the confidence score of the first example in a test's input data pair.

        A `bool` output indicates whether the test passed the expectation (always
        `True` in this function's case).

        A `float` output indicates the magnitude of the failure.
        """
        tolerance = 0.1
        change = self._positive_change(orig_conf, conf)
        if change - tolerance <= 0:
            return True
        else:
            return -(change - tolerance)
示例#10
0
    def _setup_editor(self):
        if not hasattr(self, "editor"):
            self.editor = Editor()

            pos_adj = [
                "good",
                "great",
                "excellent",
                "amazing",
                "extraordinary",
                "beautiful",
                "fantastic",
                "nice",
                "incredible",
                "exceptional",
                "awesome",
                "perfect",
                "fun",
                "adorable",
                "brilliant",
                "exciting",
                "sweet",
                "wonderful",
            ]
            neg_adj = [
                "awful",
                "bad",
                "horrible",
                "weird",
                "rough",
                "lousy",
                "unhappy",
                "average",
                "difficult",
                "poor",
                "sad",
                "frustrating",
                "hard",
                "lame",
                "nasty",
                "annoying",
                "boring",
                "creepy",
                "dreadful",
                "ridiculous",
                "terrible",
                "ugly",
                "unpleasant",
            ]
            self.editor.add_lexicon("pos_adj", pos_adj, overwrite=True)
            self.editor.add_lexicon("neg_adj", neg_adj, overwrite=True)

            pos_verb_present = [
                "like",
                "enjoy",
                "appreciate",
                "love",
                "recommend",
                "admire",
                "value",
                "welcome",
            ]
            neg_verb_present = [
                "hate", "dislike", "regret", "abhor", "dread", "despise"
            ]
            pos_verb_past = [
                "liked",
                "enjoyed",
                "appreciated",
                "loved",
                "admired",
                "valued",
                "welcomed",
            ]
            neg_verb_past = [
                "hated", "disliked", "regretted", "abhorred", "dreaded",
                "despised"
            ]
            self.editor.add_lexicon("pos_verb_present",
                                    pos_verb_present,
                                    overwrite=True)
            self.editor.add_lexicon("neg_verb_present",
                                    neg_verb_present,
                                    overwrite=True)
            self.editor.add_lexicon("pos_verb_past",
                                    pos_verb_past,
                                    overwrite=True)
            self.editor.add_lexicon("neg_verb_past",
                                    neg_verb_past,
                                    overwrite=True)
            self.editor.add_lexicon("pos_verb",
                                    pos_verb_present + pos_verb_past,
                                    overwrite=True)
            self.editor.add_lexicon("neg_verb",
                                    neg_verb_present + neg_verb_past,
                                    overwrite=True)

            noun = [
                "airline",
                "movie",
                "product",
                "customer service",
                "restaurant",
                "hotel",
                "food",
                "staff",
                "company",
                "crew",
                "service",
            ]
            self.editor.add_lexicon("noun", noun, overwrite=True)

            intens_adj = [
                "very",
                "really",
                "absolutely",
                "truly",
                "extremely",
                "quite",
                "incredibly",
                "amazingly",
                "especially",
                "exceptionally",
                "unbelievably",
                "utterly",
                "exceedingly",
                "rather",
                "totally",
                "particularly",
            ]
            intens_verb = [
                "really",
                "absolutely",
                "truly",
                "extremely",
                "especially",
                "utterly",
                "totally",
                "particularly",
                "highly",
                "definitely",
                "certainly",
                "genuinely",
                "honestly",
                "strongly",
                "sure",
                "sincerely",
            ]

            self.editor.add_lexicon("intens_adj", intens_adj, overwrite=True)
            self.editor.add_lexicon("intens_verb", intens_verb, overwrite=True)

            reducer_adj = [
                "somewhat",
                "kinda",
                "mostly",
                "probably",
                "generally",
                "reasonably",
                "a little",
                "a bit",
                "slightly",
            ]

            self.editor.add_lexicon("reducer_adj", reducer_adj, overwrite=True)

            self.monotonic_label = Expect.monotonic(increasing=True,
                                                    tolerance=0.1)
            self.monotonic_label_down = Expect.monotonic(increasing=False,
                                                         tolerance=0.1)
示例#11
0
def add_common_lexicons(editor: Editor):
    """
    Add commonly used lexicons to the editor object. These can be used in all
    the task suites.

    Note: Updates the `editor` object in place.
    """
    profession = [
        "journalist",
        "historian",
        "secretary",
        "nurse",
        "waitress",
        "accountant",
        "engineer",
        "attorney",
        "artist",
        "editor",
        "architect",
        "model",
        "interpreter",
        "analyst",
        "actor",
        "actress",
        "assistant",
        "intern",
        "economist",
        "organizer",
        "author",
        "investigator",
        "agent",
        "administrator",
        "executive",
        "educator",
        "investor",
        "DJ",
        "entrepreneur",
        "auditor",
        "advisor",
        "instructor",
        "activist",
        "consultant",
        "apprentice",
        "reporter",
        "expert",
        "psychologist",
        "examiner",
        "painter",
        "manager",
        "contractor",
        "therapist",
        "programmer",
        "musician",
        "producer",
        "associate",
        "intermediary",
        "designer",
        "cook",
        "salesperson",
        "dentist",
        "attorney",
        "detective",
        "banker",
        "researcher",
        "cop",
        "driver",
        "counselor",
        "clerk",
        "professor",
        "tutor",
        "coach",
        "chemist",
        "scientist",
        "veterinarian",
        "firefighter",
        "baker",
        "psychiatrist",
        "prosecutor",
        "director",
        "technician",
    ]

    editor.add_lexicon("profession", profession, overwrite=True)
示例#12
0

def replace_john_with_others(x, *args, **kwargs):
    # Returns empty (if John is not present) or list of strings with John replaced by Luke and Mark
    if not re.search(r'\bJohn\b', x):
        return None
    return [re.sub(r'\bJohn\b', n, x) for n in ['Luke', 'Mark']]


dataset = ['John is a man', 'Mary is a woman', 'John is an apostle']
ret = Perturb.perturb(dataset, replace_john_with_others)
ret.data

# In[12]:

import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb
from checklist.test_types import MFT, INV, DIR
editor = Editor()

t = editor.template('This is {a:adj} {mask}.',
                    adj=['good', 'great', 'excellent', 'awesome'])
test1 = MFT(t.data,
            labels=1,
            name='Simple positives',
            capability='Vocabulary',
            description='')

# In[ ]:
示例#13
0
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb


# In[4]:


#VOCAB+POS--MFT
editor = Editor(language='chinese')
con = '店家服务是{adj}的'
ret = editor.template(con,adj=['非常好', '不错', '垃圾的', '无与伦比的', '差劲', '优美的', '鸟语花香', '百花齐放', '繁花似锦', '桃红柳绿', '春色满园',
                               '春意盎然','喜上眉梢','兴高采烈','眉飞色舞','喜笑颜开','欣喜若狂','心花怒放','相当好',])
ret.data


# In[11]:


#VOCAB+POS--INV
editor = Editor(language='chinese')
con = '这次又买了一个{adj}'
ret = editor.template(con,adj=['小米10', '小天鹅', '华为mate40', '苹果12', '锤子', '魅族', '三星', '奔驰', '宝马', 
                               '劳力士', '酷派','菲仕乐','WMF','康宁','苏泊尔','健力宝','可口可乐','滴滴','回力','LV'])
ret.data
示例#14
0
#!/usr/bin/env python
# coding: utf-8

# In[1]:

import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb

# In[2]:

editor = Editor(language='chinese')

# In[6]:

ret = editor.template('这是一个{adj}电影',
                      adj=['好看的', '不好看的', '垃圾的', '无与伦比的', '新奇的', '优美的'])
ret.data

# In[15]:

ret = editor.template('{male}是小米人吗', remove_duplicates=True)
ret.data[0:10]

# In[17]:

editor.add_lexicon('adj', ['good', 'bad', 'great', 'terrible'])

# In[18]:

ret = editor.template('{adj} is not the same as {adj2}',
示例#15
0
def object_test():
    global food_ret, sport_ret, drug_ret, nondrug_ret
    """
    codeDictionary = {"D":0, "M":1, "S":2, "H":3, "F":4, "O":5, "E":6, "NA":7}
    """

    editor = Editor()
    food_ret1 = editor.template('How often do you get {food}?',
                                food=food,
                                labels=0,
                                save=True)  #, nsamples=100)   a
    food_ret2 = editor.template('I can\'t stop thinking about {food}!',
                                food=food,
                                labels=0,
                                save=True)  #, nsamples=100)

    food_ret = food_ret1 + food_ret2

    mft_food = MFT(food_ret.data,
                   labels=food_ret.labels,
                   name='Object Rec: Food',
                   capability='Objects',
                   description='Food')

    sport_ret1 = editor.template('I have to participate in {sport}?',
                                 sport=sport,
                                 labels=6,
                                 save=True)  #, nsamples=100)
    sport_ret2 = editor.template(
        'It is good to move your body, like doing {sport}.',
        sport=sport,
        labels=6,
        save=True)  #, nsamples=100)

    sport_ret = sport_ret1 + sport_ret2

    mft_sport = MFT(sport_ret.data,
                    labels=sport_ret.labels,
                    name='Object Rec: Sport',
                    capability='Objects',
                    description='Sport')

    nondrug_ret1 = editor.template('How often do you take {nondrug}?',
                                   nondrug=nondrug,
                                   labels=5)  #, save=True) #, nsamples=100)
    nondrug_ret2 = editor.template(
        'Have you taken {nondrug} for the last five months?',
        nondrug=nondrug,
        labels=5)  #, save=True) #, nsamples=100)

    nondrug_ret = nondrug_ret1 + nondrug_ret2

    mft_nondrug = MFT(nondrug_ret.data,
                      labels=nondrug_ret.labels,
                      name='Object Rec: Non Drug',
                      capability='Objects',
                      description='Non Drug')

    drug_ret1 = editor.template('How often do you get {drug}?',
                                drug=drug,
                                labels=1,
                                save=True)  #, nsamples=100)
    drug_ret2 = editor.template(
        'Have you taken {drug} for the last five months?',
        drug=drug,
        labels=1,
        save=True)  #, nsamples=100)

    drug_ret = drug_ret1 + drug_ret2

    mft_drug = MFT(drug_ret.data,
                   labels=drug_ret.labels,
                   name='Object Rec: Drug',
                   capability='Objects',
                   description='Drug')

    #print(nondrug_ret.data)

    nt = Perturb.perturb(nondrug_ret.data, swap_nondrug)
    inv_n = INV(**nt,
                name='swap nondrug name in both questions',
                capability='objects',
                description='')

    #print(len(nt.data))
    #exit()
    """
    import numpy as np
    def pp(inputs):
        p1 = np.array([0.5 for x in inputs]).reshape(-1, 1)
        p0 = 1- p1
        return np.hstack((p0, p1))
    from checklist.pred_wrapper import PredictorWrapper
    wrapped = PredictorWrapper.wrap_softmax(pp)
    inv_n.run(wrapped)
    """

    dt = Perturb.perturb(drug_ret.data, swap_drug)
    inv_d = INV(**dt,
                name='swap drug name in both questions',
                capability='objects',
                description='')

    nondrug_monodec = Expect.monotonic(label=5,
                                       increasing=False,
                                       tolerance=0.1)
    drug_monodec = Expect.monotonic(label=1, increasing=False, tolerance=0.1)

    ndt = Perturb.perturb(nondrug_ret.data, swap_nd)
    dir_nd = DIR(**ndt, expect=nondrug_monodec)

    dnt = Perturb.perturb(drug_ret.data, swap_dn)
    dir_dn = DIR(**dnt, expect=drug_monodec)

    # diet    #exercise   # other     # medical  # other # medical, # o -> m, # m->o
    tests = [mft_food, mft_sport, mft_nondrug,
             mft_drug]  #, inv_n , inv_d, dir_nd, dir_dn ]
    names = [
        x.strip(",")
        for x in "mft_food, mft_sport, mft_nondrug, mft_drug".split()
    ]  #, inv_n, inv_d, dir_nd, dir_dn".split() ]

    assert (len(tests) == len(names))

    for test, name in zip(tests, names):
        test.to_raw_file('./tests/' + name + '.txt')

    return tests, names
示例#16
0
def main():
    editor = Editor()
    adjpos = """ great fun lovely sexy wonderful amazing awesome good fantastic excelent masterful fabulous incredible salacious"""
    adjpos = adjpos.strip().split()
    adjneg = """terrible horrible shitty worst disasterous bad abominable atrocious crappy repulsive yucky irredeemable"""
    adjneg = adjneg.strip().split()

    rev_neg_pos = "some parts are {pos} but overall it is {neg}."
    rev_pos_neg = "some parts are {neg} but overall it is {pos}."

    final_data = []
    id_counter = 0

    out_0 = editor.template(rev_neg_pos, pos=adjpos, neg=adjneg)
    #print(out_0)
    out_1 = editor.template(rev_pos_neg, pos=adjpos, neg=adjneg)
    #print(out_1)

    seed = 'I had a {neg} day really {neg} but the album made it {pos}'
    ous_0 = editor.template(seed, pos=adjpos, neg=adjneg)
    #print(ous_0)

    seed = 'I had a {pos} day really {pos} but the album made it {neg}'
    ous_1 = editor.template(seed, pos=adjpos, neg=adjneg)
    #print(ous_1)

    with open('../data/music_reviews_dev.json', 'r') as f:
        data = []
        for line in f.readlines():
            data.append(json.loads(line))
    data = random.sample(data, 100)

    tmp = []
    tmp_labels = []
    for d in data:
        tmp.append(d.get('reviewText'))
        tmp_labels.append(d.get('sentiment'))

    baseline = "{male} is my {religion_adj}, {sexual_adj} friend, and he showed me this song. I found it {pos}"
    ret_pos = editor.template(baseline, pos=adjpos)
    ret_neg = editor.template(baseline, pos=adjneg)

    for idx, entry in enumerate(
            random.sample(
                Perturb.perturb(tmp, Perturb.add_typos,
                                keep_original=False).data, 100)):
        final_data.append(to_json(entry[0], tmp_labels[idx], id_counter))
        id_counter += 1

    for entry in random.sample(out_0.data, 100):
        final_data.append(to_json(entry, "negative", id_counter))
        id_counter += 1

    for entry in random.sample(out_1.data, 100):
        final_data.append(to_json(entry, "positive", id_counter))
        id_counter += 1

    for entry in random.sample(ous_0.data, 100):
        final_data.append(to_json(entry, "positive", id_counter))
        id_counter += 1

    for entry in random.sample(ous_1.data, 100):
        final_data.append(to_json(entry, "negative", id_counter))
        id_counter += 1

    for entry in random.sample(ret_pos.data, 100):
        final_data.append(to_json(entry, "positive", id_counter))
        id_counter += 1

    for entry in random.sample(ret_neg.data, 100):
        final_data.append(to_json(entry, "negative", id_counter))
        id_counter += 1

    with open('../data/music_reviews_yucky.json', 'w') as file:
        for line in final_data:
            file.write(json.dumps(line))
            file.write("\n")
"""
Script to generate pairs of sentences in direct / indirect object alternation, using
CheckList and RoBERTa.
"""
import checklist
from checklist.editor import Editor
import pickle

VERBS = ['gave', 'sent', 'mailed', 'brought', 'showed', 'sold']

editor = Editor()

sentences = set()
for vb in VERBS:
    # Only use the first 3 masked words. The second sentence seems to make it generate
    # sentences that are equally likely in both syntactic positions.
    ret = editor.suggest(
        f'The {{mask}} {vb} the {{mask}} a {{mask}}. The {{mask}} {vb} a {{mask}} to the {{mask}}.',
    )

    for t in ret:
        subj = t[0]
        iobj = t[1]
        dobj = t[2]
        if subj == iobj or iobj == dobj or subj == dobj:
            continue
        sent1 = f'The {subj} {vb} the {iobj} a {dobj}.'
        sent2 = f'The {subj} {vb} a {dobj} to the {iobj}.'
        sentences.add((sent1, sent2))

for sent1, sent2 in sentences: