Exemplo n.º 1
0
    def test_with_token_characters_indexer(self):

        inputs = {
            "premise": "I always write unit tests for my code.",
            "hypothesis": "One time I didn't write any unit tests for my code.",
        }

        archive = load_archive(
            self.FIXTURES_ROOT / "decomposable_attention" / "serialization" / "model.tar.gz"
        )
        predictor = Predictor.from_archive(archive, "textual-entailment")
        predictor._dataset_reader._token_indexers["chars"] = TokenCharactersIndexer(
            min_padding_length=1
        )
        predictor._model._text_field_embedder._token_embedders["chars"] = EmptyEmbedder()

        hotflipper = Hotflip(predictor)
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, "hypothesis", "grad_input_1")
        assert attack is not None
        assert "final" in attack
        assert "original" in attack
        assert "outputs" in attack
        assert len(attack["final"][0]) == len(
            attack["original"]
        )  # hotflip replaces words without removing
Exemplo n.º 2
0
    def test_with_token_characters_indexer(self):

        inputs = {"sentence": "I always write unit tests for my code."}

        archive = load_archive(self.FIXTURES_ROOT / "basic_classifier" /
                               "serialization" / "model.tar.gz")
        predictor = Predictor.from_archive(archive)
        predictor._dataset_reader._token_indexers[
            "chars"] = TokenCharactersIndexer(min_padding_length=1)
        predictor._model._text_field_embedder._token_embedders[
            "chars"] = EmptyEmbedder()

        hotflipper = Hotflip(predictor)
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, "tokens", "grad_input_1")
        assert attack is not None
        assert "final" in attack
        assert "original" in attack
        assert "outputs" in attack
        assert len(attack["final"][0]) == len(
            attack["original"])  # hotflip replaces words without removing

        # This checks for a bug that arose with a change in the pytorch API.  We want to be sure we
        # can handle the case where we have to re-encode a vocab item because we didn't save it in
        # our fake embedding matrix (see Hotflip docstring for more info).
        hotflipper = Hotflip(predictor, max_tokens=50)
        hotflipper.initialize()
        hotflipper._first_order_taylor(grad=torch.rand((10, )).numpy(),
                                       token_idx=torch.tensor(60),
                                       sign=1)
    def test_using_squad_model(self):
        inputs = {
            "question":
            "OMG, I heard you coded a test that succeeded on its first attempt, is that true?",
            "passage":
            "Bro, never doubt a coding wizard! I am the king of software, MWAHAHAHA",
        }

        archive = load_archive(FIXTURES_ROOT / "rc" / "bidaf" /
                               "serialization" / "model.tar.gz")
        predictor = Predictor.from_archive(archive, "reading-comprehension")

        hotflipper = Hotflip(predictor)
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, "question",
                                             "grad_input_2")
        print(attack)
        assert attack is not None
        assert "final" in attack
        assert "original" in attack
        assert "outputs" in attack
        assert len(attack["final"][0]) == len(
            attack["original"])  # hotflip replaces words without removing

        instance = predictor._json_to_instance(inputs)
        assert instance["question"] != attack["final"][
            0]  # check that the input has changed.

        outputs = predictor._model.forward_on_instance(instance)
        original_labeled_instance = predictor.predictions_to_labeled_instances(
            instance, outputs)[0]
        original_span_start = original_labeled_instance[
            "span_start"].sequence_index
        original_span_end = original_labeled_instance[
            "span_end"].sequence_index

        flipped_span_start = attack["outputs"][0]["best_span"][0]
        flipped_span_end = attack["outputs"][0]["best_span"][1]

        for i, token in enumerate(instance["question"]):
            token = str(token)
            if token in DEFAULT_IGNORE_TOKENS:
                assert token in attack["final"][
                    0]  # ignore tokens should not be changed
            # HotFlip keeps changing tokens until either the prediction changes or all tokens have
            # been changed. If there are tokens in the HotFlip final result that were in the
            # original (i.e., not all tokens were flipped), then the prediction should be
            # different.
            else:
                if token == attack["final"][0][i]:
                    assert (original_span_start != flipped_span_start
                            or original_span_end != flipped_span_end)
Exemplo n.º 4
0
    def test_targeted_attack_from_json(self):
        inputs = {"sentence": "The doctor ran to the emergency room to see [MASK] patient."}

        archive = load_archive(self.FIXTURES_ROOT / 'masked_language_model' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'masked_language_model')

        hotflipper = Hotflip(predictor, vocab_namespace='tokens')
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, target={'words': ['hi']})
        assert attack is not None
        assert 'final' in attack
        assert 'original' in attack
        assert 'outputs' in attack
        assert len(attack['final'][0]) == len(attack['original']) # hotflip replaces words without removing
        assert attack['final'][0] != attack['original']
Exemplo n.º 5
0
    def test_hotflip(self):
        inputs = {"sentence": "I always write unit tests for my code."}

        archive = load_archive(self.FIXTURES_ROOT / "basic_classifier" /
                               "serialization" / "model.tar.gz")
        predictor = Predictor.from_archive(archive)

        hotflipper = Hotflip(predictor)
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, "tokens", "grad_input_1")
        assert attack is not None
        assert "final" in attack
        assert "original" in attack
        assert "outputs" in attack
        assert len(attack["final"][0]) == len(
            attack["original"])  # hotflip replaces words without removing
Exemplo n.º 6
0
    def test_targeted_attack_from_json(self):
        inputs = {"sentence": "The doctor ran to the emergency room to see [MASK] patient."}

        archive = load_archive(
            self.FIXTURES_ROOT / "masked_language_model" / "serialization" / "model.tar.gz"
        )
        predictor = Predictor.from_archive(archive, "masked_language_model")

        hotflipper = Hotflip(predictor, vocab_namespace="tokens")
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, target={"words": ["hi"]})
        assert attack is not None
        assert "final" in attack
        assert "original" in attack
        assert "outputs" in attack
        assert len(attack["final"][0]) == len(
            attack["original"]
        )  # hotflip replaces words without removing
        assert attack["final"][0] != attack["original"]
Exemplo n.º 7
0
    def test_interpret_works_with_custom_embedding_layer(self):
        inputs = {"sentence": "I always write unit tests for my code"}
        vocab = Vocabulary()
        vocab.add_tokens_to_namespace(
            [w for w in inputs["sentence"].split(" ")])
        model = FakeModelForTestingInterpret(
            vocab, max_tokens=len(inputs["sentence"].split(" ")))
        predictor = FakePredictorForTestingInterpret(
            model, TextClassificationJsonReader())

        hotflipper = Hotflip(predictor)
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, "tokens", "grad_input_1")
        assert attack is not None
        assert "final" in attack
        assert "original" in attack
        assert "outputs" in attack
        assert len(attack["final"][0]) == len(
            attack["original"])  # hotflip replaces words without removing
Exemplo n.º 8
0
    def test_hotflip(self):
        inputs = {
            "premise": "I always write unit tests for my code.",
            "hypothesis": "One time I didn't write any unit tests for my code."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' /
                               'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'textual-entailment')

        hotflipper = Hotflip(predictor)
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, 'hypothesis',
                                             'grad_input_1')
        assert attack is not None
        assert 'final' in attack
        assert 'original' in attack
        assert 'outputs' in attack
        assert len(attack['final'][0]) == len(
            attack['original'])  # hotflip replaces words without removing
Exemplo n.º 9
0
    def test_hotflip(self):
        inputs = {
            "premise": "I always write unit tests for my code.",
            "hypothesis": "One time I didn't write any unit tests for my code.",
        }

        archive = load_archive(
            self.FIXTURES_ROOT / "decomposable_attention" / "serialization" / "model.tar.gz"
        )
        predictor = Predictor.from_archive(archive, "textual-entailment")

        hotflipper = Hotflip(predictor)
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, "hypothesis", "grad_input_1")
        assert attack is not None
        assert "final" in attack
        assert "original" in attack
        assert "outputs" in attack
        assert len(attack["final"][0]) == len(
            attack["original"]
        )  # hotflip replaces words without removing
Exemplo n.º 10
0
    def test_with_token_characters_indexer(self):

        inputs = {"sentence": "I always write unit tests for my code."}

        archive = load_archive(
            self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
        )
        predictor = Predictor.from_archive(archive)
        predictor._dataset_reader._token_indexers["chars"] = TokenCharactersIndexer(
            min_padding_length=1
        )
        predictor._model._text_field_embedder._token_embedders["chars"] = EmptyEmbedder()

        hotflipper = Hotflip(predictor)
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, "tokens", "grad_input_1")
        assert attack is not None
        assert "final" in attack
        assert "original" in attack
        assert "outputs" in attack
        assert len(attack["final"][0]) == len(
            attack["original"]
        )  # hotflip replaces words without removing
Exemplo n.º 11
0
    def test_hotflip(self):
        inputs = {
            "premise": "I always write unit tests for my code.",
            "hypothesis": "One time I didn't write any unit tests for my code."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' /
                               'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'textual-entailment')

        hotflipper = Hotflip(predictor)
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, 'hypothesis',
                                             'grad_input_1')
        assert attack is not None
        assert 'final' in attack
        assert 'original' in attack
        assert 'outputs' in attack
        assert len(attack['final'][0]) == len(
            attack['original'])  # hotflip replaces words without removing

        # test using SQuAD model (tests different equals method)
        inputs = {
            "question":
            "OMG, I heard you coded a test that succeeded on its first attempt, is that true?",
            "passage":
            "Bro, never doubt a coding wizard! I am the king of software, MWAHAHAHA"
        }

        archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'machine-comprehension')

        hotflipper = Hotflip(predictor)
        hotflipper.initialize()
        ignore_tokens = ["@@NULL@@", '.', ',', ';', '!', '?']
        attack = hotflipper.attack_from_json(inputs, 'question',
                                             'grad_input_2')
        assert attack is not None
        assert 'final' in attack
        assert 'original' in attack
        assert 'outputs' in attack
        assert len(attack['final'][0]) == len(
            attack['original'])  # hotflip replaces words without removing

        instance = predictor._json_to_instance(inputs)
        assert instance['question'] != attack['final'][
            0]  # check that the input has changed.

        outputs = predictor._model.forward_on_instance(instance)
        original_labeled_instance = predictor.predictions_to_labeled_instances(
            instance, outputs)[0]
        original_span_start = original_labeled_instance[
            'span_start'].sequence_index
        original_span_end = original_labeled_instance[
            'span_end'].sequence_index

        flipped_span_start = attack['outputs']['best_span'][0]
        flipped_span_end = attack['outputs']['best_span'][1]

        for token in instance['question']:
            token = str(token)
            if token in ignore_tokens:
                assert token in attack['final'][
                    0]  # ignore tokens should not be changed
            # HotFlip keeps changing tokens until either the predictions changes or all tokens have
            # been changed. If there are tokens in the HotFlip final result that were in the original
            # (i.e., not all tokens were flipped), then the prediction should be different.
            else:
                if token in attack['final'][0]:
                    assert original_span_start != flipped_span_start or original_span_end != flipped_span_end