Exemplo n.º 1
0
"""Spacy Operation."""
from __future__ import annotations

from typing import List

import torch
from meerkat import SpacyCell
from meerkat.tools.lazy_loader import LazyLoader

from robustnessgym.core.operation import Operation
from robustnessgym.core.slice import SliceDataPanel as DataPanel

spacy = LazyLoader("spacy", warning="Please `pip install spacy`.")
spacy_tokens = LazyLoader("spacy.tokens")
nc = LazyLoader(
    "neuralcoref",
    error="Can't import neuralcoref. Please install neuralcoref using:\n"
    "git clone https://github.com/huggingface/neuralcoref.git\n"
    "cd neuralcoref\n"
    "pip install -r requirements.txt\n"
    "pip install -e .",
)


class SpacyOp(Operation):
    """Operation that runs the Spacy pipeline."""
    def __init__(
        self,
        lang: str = "en_core_web_sm",
        nlp: spacy.language.Language = None,
        neuralcoref: bool = False,
Exemplo n.º 2
0
from typing import List

import cytoolz as tz
import torch
from meerkat.tools.lazy_loader import LazyLoader

from robustnessgym.core.identifier import Identifier
from robustnessgym.slicebuilders.transformation import SingleColumnTransformation

fastBPE = LazyLoader("fastBPE",
                     error="Install fastBPE with `pip install fastBPE`.")


class FairseqBacktranslation(SingleColumnTransformation):
    """Backtranslation using torchhub fairseq models."""
    def __init__(
        self,
        n_src2tgt: int = 1,
        n_tgt2src: int = 1,
        langs: str = "en2de",
        torchhub_dir: str = None,
        device: str = "cuda",
        src2tgt_topk: int = 1000,
        src2tgt_temp: float = 1.0,
        tgt2src_topk: int = 1000,
        tgt2src_temp: float = 1.0,
    ):

        super(FairseqBacktranslation,
              self).__init__(identifiers=Identifier.range(
                  n=n_src2tgt * n_tgt2src,
Exemplo n.º 3
0
import cytoolz as tz
import torch
from meerkat.tools.lazy_loader import LazyLoader
from transformers import (
    AutoModel,
    AutoModelForQuestionAnswering,
    AutoModelForSeq2SeqLM,
    AutoModelForSequenceClassification,
    AutoTokenizer,
)

from robustnessgym.core.metrics import compute_metric
from robustnessgym.core.slice import SliceDataPanel as DataPanel
from robustnessgym.tasks.task import Task

ludwig_api = LazyLoader("ludwig.api")
nltk = LazyLoader("nltk")


class Model:
    def __init__(
        self,
        identifier: str,
        task: Task,
        model=None,
        evaluation_fn=None,
        device: str = None,
        is_classifier: bool = None,
    ):

        # TODO(karan): improve this wrapper around models
Exemplo n.º 4
0
from functools import partial
from types import SimpleNamespace

import numpy as np
from meerkat.tools.lazy_loader import LazyLoader

scipy_optimize = LazyLoader("scipy.optimize")
scipy_special = LazyLoader("scipy.special")
skmetrics = LazyLoader("sklearn.metrics.pairwise")


def Phi(D, edge_list: list = None):
    """Given an n x d matrix of (example, slices), calculate the potential
    matrix.

    Includes correlations modeled by the edges in the `edge_list`.
    """
    if edge_list is not None:
        pairwise_terms = (
            D[np.arange(len(D)), edge_list[:, 0][:, np.newaxis]].T *
            D[np.arange(len(D)), edge_list[:, 1][:, np.newaxis]].T)
        return np.concatenate([D, pairwise_terms], axis=1)
    else:
        return D


def log_partition_ratio(x, Phi_D_src, n_src):
    """Calculate the log-partition ratio in the KLIEP problem."""
    return np.log(n_src) - scipy_special.logsumexp(Phi_D_src.dot(x))

Exemplo n.º 5
0
import re
import statistics
from typing import Callable, Sequence, Union

import numpy as np
import torch
from meerkat.tools.lazy_loader import LazyLoader
from sklearn.metrics import accuracy_score, f1_score

nltk = LazyLoader("nltk")
rouge_score = LazyLoader("rouge_score")


def get_metric(name: str) -> Callable:
    """Get metrics from string names."""
    if name == "accuracy":
        return accuracy
    elif name == "f1":
        return f1
    elif name == "f1_micro":
        return f1_micro
    elif name == "f1_macro":
        return f1_macro
    else:
        raise NotImplementedError(f"Metric name {name} not recognized.")


def accuracy(
    predictions: Union[list, np.array, torch.Tensor],
    labels: Union[list, np.array, torch.Tensor],
):
from typing import List

import fuzzywuzzy.fuzz as fuzz
import numpy as np
from meerkat.tools.lazy_loader import LazyLoader

from robustnessgym.core.decorators import prerequisites
from robustnessgym.core.identifier import Identifier
from robustnessgym.core.operation import lookup
from robustnessgym.core.slice import SliceDataPanel as DataPanel
from robustnessgym.ops.allen import AllenConstituencyParsingOp
from robustnessgym.slicebuilders.subpopulations.score import ScoreSubpopulation

nltk = LazyLoader("nltk")


@prerequisites(AllenConstituencyParsingOp)
class ConstituencyOverlapSubpopulation(ScoreSubpopulation):
    def score(
        self,
        batch: DataPanel,
        columns: List[str],
        *args,
        **kwargs,
    ) -> np.ndarray:
        # Require that the number of keys is exactly 2
        assert len(columns) == 2, "Must specify exactly 2 keys."

        # Retrieve the trees
        trees = {
            col: lookup(batch, AllenConstituencyParsingOp, [col]) for col in columns
Exemplo n.º 7
0
"""Easy data augmentation techniques for text classification. Jason Wei and Kai
Zou.

Taken from https://github.com/jasonwei20/eda_nlp
"""

import random
import re
from random import shuffle

from meerkat.tools.lazy_loader import LazyLoader

nltk_corpus = LazyLoader("nltk.corpus")

random.seed(1)

# stop words list
stop_words = [
    "i",
    "me",
    "my",
    "myself",
    "we",
    "our",
    "ours",
    "ourselves",
    "you",
    "your",
    "yours",
    "yourself",
    "yourselves",
Exemplo n.º 8
0
from collections import OrderedDict
from typing import List, Tuple

import cytoolz as tz
import numpy as np
from meerkat.tools.lazy_loader import LazyLoader

from robustnessgym.core.identifier import Identifier
from robustnessgym.core.model import Model
from robustnessgym.core.slice import SliceDataPanel as DataPanel
from robustnessgym.slicebuilders.attack import Attack

attack_recipes = LazyLoader(
    "textattack.attack_recipes",
    error="Install TextAttack with `pip install textattack`.",
)
wrappers = LazyLoader("textattack.models.wrappers")


class TextAttack(Attack):
    """Class for TextAttack."""
    def __init__(
        self,
        attack: "attack_recipes.AttackRecipe",
    ):
        super(TextAttack, self).__init__(
            identifiers=[Identifier(
                self.__class__.__name__,
                attack=attack,
            )], )
Exemplo n.º 9
0
from typing import Dict, List, Tuple

import numpy as np
from meerkat.tools.lazy_loader import LazyLoader

from robustnessgym.core.identifier import Identifier
from robustnessgym.core.tools import transpose_batch
from robustnessgym.slicebuilders.attack import Attack

morpheus = LazyLoader("morpheus")


class Morpheus(Attack):
    def __init__(
        self,
        dataset: str,
        model: str,
        constrain_pos: bool = True,
        **kwargs,
    ):
        super().__init__(identifiers=[
            Identifier(self.__class__.__name__, dataset=dataset, model=model)
        ], )

        self.constrain_pos = constrain_pos

        self.dataset = dataset.lower()
        if self.dataset == "mnli":
            self.attack = morpheus.MorpheusHuggingfaceNLI(model)
        elif "squad" in self.dataset:
            is_squad2 = "2" in self.dataset
Exemplo n.º 10
0
from __future__ import annotations

from typing import Dict, List

from meerkat import AbstractCell
from meerkat.tools.lazy_loader import LazyLoader

from robustnessgym.core.operation import Operation
from robustnessgym.core.slice import SliceDataPanel as DataPanel

stanza = LazyLoader("stanza", error="Please `pip install stanza`.")


class StanzaCell(AbstractCell):
    """Cell that stores a Stanza Document."""
    def __init__(self, doc: stanza.Document, *args, **kwargs):
        super(StanzaCell, self).__init__(*args, **kwargs)
        self.doc = doc

    def get(self, *args, **kwargs):
        return self.doc

    @property
    def data(self) -> stanza.Document:
        return self.doc

    @classmethod
    def from_text(cls, text: str, pipeline: stanza.Pipeline) -> StanzaCell:
        return cls(pipeline(text))

    def get_state(self) -> Dict:
Exemplo n.º 11
0
"""Transformations using nlpaug."""
from typing import List

from meerkat.tools.lazy_loader import LazyLoader

from robustnessgym.core.identifier import Identifier
from robustnessgym.slicebuilders.transformation import SingleColumnTransformation

nlpaug_flow = LazyLoader("nlpaug.flow", error="Please `pip install nlpaug`.")


class NlpAugTransformation(SingleColumnTransformation):
    """Class for building transformations using nlpaug."""
    def __init__(self,
                 pipeline: "nlpaug_flow.Pipeline",
                 num_transformed: int = 1,
                 identifiers: List[Identifier] = None,
                 *args,
                 **kwargs):
        assert isinstance(pipeline, nlpaug_flow.Pipeline), (
            "`pipeline` must be an nlpaug Pipeline object. Please use \n"
            "from nlpaug.flow import Sequential\n"
            "rg.NlpAugTransformation(pipeline=Sequential(flow=[...])).")

        super(NlpAugTransformation,
              self).__init__(num_transformed=num_transformed,
                             identifiers=Identifier.range(
                                 n=num_transformed,
                                 _name=self.__class__.__name__,
                                 pipeline=[
                                     Identifier(
Exemplo n.º 12
0
from typing import List

from meerkat.tools.lazy_loader import LazyLoader

from robustnessgym.core.operation import Operation
from robustnessgym.core.slice import SliceDataPanel as DataPanel
from robustnessgym.mixins.device import DeviceMixin

predictors = LazyLoader("allennlp.predictors")


class AllenPredictionOp(DeviceMixin, Operation):
    def __init__(
        self,
        path: str,
        device: str,
    ):
        super(AllenPredictionOp, self).__init__(device=device)

        self._predictor = predictors.Predictor.from_path(
            archive_path=path, cuda_device=self.cuda_device)

    @property
    def predictor(self):
        return self._predictor

    def process_batch(
        self,
        dp: DataPanel,
        columns: List[str],
        **kwargs,
Exemplo n.º 13
0
import hashlib
import inspect
import json
from collections import defaultdict
from functools import partial
from typing import Callable, Dict, List, Mapping, Optional, Sequence

import cytoolz as tz
import numpy as np
import progressbar
import torch
import yaml
from meerkat import ImagePath
from meerkat.tools.lazy_loader import LazyLoader

PIL = LazyLoader("PIL")


def save_image(image, filename):
    """Save 'image' to file 'filename' and return an RGImage object."""
    if isinstance(image, torch.Tensor):
        image = image.numpy()

    image = PIL.Image.fromarray(image.astype(np.uint8))
    if image.mode != "RGB":
        image = image.convert("RGB")
    image.save(filename)

    return ImagePath(filename)