Пример #1
0
    def test_sce_equals_ce(self):
        # Does soft ce loss match classic ce loss when labels are one-hot?
        Y_golds = torch.LongTensor([0, 1, 2])
        Y_golds_probs = torch.Tensor(
            preds_to_probs(Y_golds.numpy(), num_classes=4))

        Y_probs = torch.rand_like(Y_golds_probs)
        Y_probs = Y_probs / Y_probs.sum(dim=1).reshape(-1, 1)

        ce_loss = F.cross_entropy(Y_probs, Y_golds, reduction="none")
        ces_loss = cross_entropy_with_probs(Y_probs,
                                            Y_golds_probs,
                                            reduction="none")
        np.testing.assert_equal(ce_loss.numpy(), ces_loss.numpy())

        ce_loss = F.cross_entropy(Y_probs, Y_golds, reduction="sum")
        ces_loss = cross_entropy_with_probs(Y_probs,
                                            Y_golds_probs,
                                            reduction="sum")
        np.testing.assert_equal(ce_loss.numpy(), ces_loss.numpy())

        ce_loss = F.cross_entropy(Y_probs, Y_golds, reduction="mean")
        ces_loss = cross_entropy_with_probs(Y_probs,
                                            Y_golds_probs,
                                            reduction="mean")
        np.testing.assert_equal(ce_loss.numpy(), ces_loss.numpy())
    def test_roc_auc(self):
        golds = np.array([0, 0, 0, 0, 1])
        probs = preds_to_probs(golds, 2)
        probs_nonbinary = np.array([
            [1.0, 0.0, 0.0],
            [0.7, 0.0, 0.3],
            [0.8, 0.0, 0.2],
            [1.0, 0.0, 0.0],
            [0.0, 1.0, 0.0],
        ])

        roc_auc = metric_score(golds,
                               preds=None,
                               probs=probs,
                               metric="roc_auc")
        self.assertAlmostEqual(roc_auc, 1.0)
        probs = np.fliplr(probs)
        roc_auc = metric_score(golds,
                               preds=None,
                               probs=probs,
                               metric="roc_auc")
        self.assertAlmostEqual(roc_auc, 0.0)

        with self.assertRaisesRegex(
                ValueError,
                "Metric roc_auc is currently only defined for binary"):
            metric_score(golds,
                         preds=None,
                         probs=probs_nonbinary,
                         metric="roc_auc")
Пример #3
0
def train_model_from_probs(df_train_filtered, probs_train_filtered, df_valid,
                           df_test):
    set_seeds()

    vectorizer = modeler.vectorizer
    X_train = vectorizer.fit_transform(df_train_filtered.text.tolist())

    X_valid = vectorizer.transform(df_valid["text"].tolist())
    X_test = vectorizer.transform(df_test["text"].tolist())

    Y_valid = df_valid["label"].values
    Y_test = df_test["label"].values

    # Define a vanilla logistic regression model with Keras
    keras_model = get_keras_logreg(input_dim=X_train.shape[1])

    keras_model.fit(
        x=X_train,
        y=probs_train_filtered,
        validation_data=(X_valid, preds_to_probs(Y_valid, 2)),
        callbacks=[get_keras_early_stopping()],
        epochs=50,
        verbose=0,
    )

    modeler.keras_model = keras_model

    preds_test = keras_model.predict(x=X_test).argmax(axis=1)

    stats = modeler.get_stats(modeler.Y_test, preds_test)

    update_stats({**stats, "data": "test"}, "train_model")

    return stats
Пример #4
0
    def test_invalid_reduction(self):
        Y_golds = torch.LongTensor([0, 1, 2])
        Y_golds_probs = torch.Tensor(
            preds_to_probs(Y_golds.numpy(), num_classes=4))

        Y_probs = torch.rand_like(Y_golds_probs)
        Y_probs = Y_probs / Y_probs.sum(dim=1).reshape(-1, 1)

        with self.assertRaisesRegex(ValueError, "Keyword 'reduction' must be"):
            cross_entropy_with_probs(Y_probs, Y_golds_probs, reduction="bad")
Пример #5
0
    def test_perfect_predictions(self):
        # Does soft ce loss achieve approx. 0 loss with perfect predictions?
        Y_golds = torch.LongTensor([0, 1, 2])
        Y_golds_probs = torch.Tensor(
            preds_to_probs(Y_golds.numpy(), num_classes=4))

        Y_probs = Y_golds_probs.clone()
        Y_probs[Y_probs == 1] = 100
        Y_probs[Y_probs == 0] = -100

        ces_loss = cross_entropy_with_probs(Y_probs, Y_golds_probs)
        np.testing.assert_equal(ces_loss.numpy(), 0)
Пример #6
0
    def test_score_slices(self):
        DATA = [5, 10, 19, 22, 25]

        @slicing_function()
        def sf(x):
            return x.num < 20

        # We expect 3/5 correct -> 0.6 accuracy
        golds = np.array([0, 1, 0, 1, 0])
        preds = np.array([0, 0, 0, 0, 0])
        probs = preds_to_probs(preds, 2)

        # In the slice, we expect the last 2 elements to masked
        # We expect 2/3 correct -> 0.666 accuracy
        data = [SimpleNamespace(num=x) for x in DATA]
        S = SFApplier([sf]).apply(data)
        scorer = Scorer(metrics=["accuracy"])

        # Test normal score
        metrics = scorer.score(golds=golds, preds=preds, probs=probs)
        self.assertEqual(metrics["accuracy"], 0.6)

        # Test score_slices
        slice_metrics = scorer.score_slices(S=S,
                                            golds=golds,
                                            preds=preds,
                                            probs=probs)
        self.assertEqual(slice_metrics["overall"]["accuracy"], 0.6)
        self.assertEqual(slice_metrics["sf"]["accuracy"], 2.0 / 3.0)

        # Test as_dataframe=True
        metrics_df = scorer.score_slices(S=S,
                                         golds=golds,
                                         preds=preds,
                                         probs=probs,
                                         as_dataframe=True)
        self.assertTrue(isinstance(metrics_df, pd.DataFrame))
        self.assertEqual(metrics_df["accuracy"]["overall"], 0.6)
        self.assertEqual(metrics_df["accuracy"]["sf"], 2.0 / 3.0)

        # Test wrong shapes
        with self.assertRaisesRegex(ValueError,
                                    "must have the same number of elements"):
            scorer.score_slices(S=S,
                                golds=golds[:1],
                                preds=preds,
                                probs=probs,
                                as_dataframe=True)
Пример #7
0
    def test_loss_weights(self):
        FACTOR = 10

        # Do class weights work as expected?
        Y_golds = torch.LongTensor([0, 0, 1])
        Y_golds_probs = torch.Tensor(
            preds_to_probs(Y_golds.numpy(), num_classes=3))
        # Predict [1, 1, 1]
        Y_probs = torch.tensor([[-100.0, 100.0,
                                 -100.0], [-100.0, 100.0, -100.0],
                                [-100.0, 100.0, -100.0]])

        ces_loss0 = cross_entropy_with_probs(Y_probs, Y_golds_probs).numpy()
        weight1 = torch.FloatTensor([1, 1, 1])
        ces_loss1 = cross_entropy_with_probs(Y_probs,
                                             Y_golds_probs,
                                             weight=weight1).numpy()
        # Do weights of 1 match no weights at all?
        self.assertEqual(ces_loss0, ces_loss1)

        weight2 = torch.FloatTensor([1, 2, 1])
        ces_loss2 = cross_entropy_with_probs(Y_probs,
                                             Y_golds_probs,
                                             weight=weight2).numpy()
        weight3 = weight2 * FACTOR
        ces_loss3 = cross_entropy_with_probs(Y_probs,
                                             Y_golds_probs,
                                             weight=weight3).numpy()
        # If weights are X times larger, is loss X times larger?
        self.assertAlmostEqual(ces_loss2 * FACTOR, ces_loss3, places=3)

        # Note that PyTorch's cross-entropy loss has the unusual behavior that weights
        # behave differently when losses are averaged inside vs. outside the function.
        # See https://github.com/pytorch/pytorch/issues/8062 for details.
        ce_loss3 = (F.cross_entropy(Y_probs,
                                    Y_golds,
                                    weight=weight3,
                                    reduction="none").mean().numpy())
        # Do hard and soft ce loss still match when we use class weights?
        self.assertAlmostEqual(ce_loss3, ces_loss3, places=3)
Пример #8
0
    def train(self):
        probs_train = self.label_model.predict_proba(L=self.L_train)

        df_train_filtered, probs_train_filtered = filter_unlabeled_dataframe(
            X=self.df_train, y=probs_train, L=self.L_train)

        if len(df_train_filtered) == 0:
            print("Labeling functions cover none of the training examples!",
                  file=sys.stderr)
            return {"micro_f1": 0}

        #from tensorflow.keras.utils import to_categorical
        #df_train_filtered, probs_train_filtered = self.df_dev, to_categorical(self.df_dev["label"].values)

        vectorizer = self.vectorizer
        X_train = vectorizer.transform(df_train_filtered.text.tolist())

        X_dev = vectorizer.transform(self.df_dev.text.tolist())
        X_valid = vectorizer.transform(self.df_valid.text.tolist())
        X_test = vectorizer.transform(self.df_test.text.tolist())

        self.keras_model = get_keras_logreg(input_dim=X_train.shape[1])

        self.keras_model.fit(
            x=X_train,
            y=probs_train_filtered,
            validation_data=(X_valid, preds_to_probs(self.Y_valid, 2)),
            callbacks=[get_keras_early_stopping()],
            epochs=20,
            verbose=0,
        )

        preds_test = self.keras_model.predict(x=X_test).argmax(axis=1)

        #return preds_test
        return self.get_stats(self.Y_test, preds_test)
Пример #9
0
 def test_preds_to_probs(self):
     np.testing.assert_array_equal(preds_to_probs(PREDS, 2), PREDS_ROUND)
Пример #10
0
tf.set_random_seed(seed)
sess = tf.compat.v1.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

# %% {"tags": ["md-exclude-output"]}
from snorkel.analysis import metric_score
from snorkel.utils import preds_to_probs
from utils import get_keras_logreg, get_keras_early_stopping

# Define a vanilla logistic regression model with Keras
keras_model = get_keras_logreg(input_dim=X_train.shape[1])

keras_model.fit(
    x=X_train,
    y=probs_train_filtered,
    validation_data=(X_valid, preds_to_probs(Y_valid, 2)),
    callbacks=[get_keras_early_stopping()],
    epochs=50,
    verbose=0,
)

# %%
preds_test = keras_model.predict(x=X_test).argmax(axis=1)
test_acc = metric_score(golds=Y_test, preds=preds_test, metric="accuracy")
print(f"Test Accuracy: {test_acc * 100:.1f}%")

# %% [markdown]
# **We observe an additional boost in accuracy over the `LabelModel` by multiple points!
# By using the label model to transfer the domain knowledge encoded in our LFs to the discriminative model,
# we were able to generalize beyond the noisy labeling heuristics**.
Пример #11
0
X_test, Y_test = df_to_features(vectorizer, df_test, "test")

# %% [markdown]
# We define a `LogisticRegression` model from `sklearn`.

# %%
from sklearn.linear_model import LogisticRegression

sklearn_model = LogisticRegression(C=0.001, solver="liblinear")
sklearn_model.fit(X=X_train, y=Y_train)

# %%
from snorkel.utils import preds_to_probs

preds_test = sklearn_model.predict(X_test)
probs_test = preds_to_probs(preds_test, 2)

# %%
from sklearn.metrics import f1_score

print(f"Test set F1: {100 * f1_score(Y_test, preds_test):.1f}%")

# %% [markdown]
# ### Store slice metadata in `S`

# %% [markdown]
# We apply our list of `sfs` to the data using an SF applier.
# For our data format, we leverage the [`PandasSFApplier`](https://snorkel.readthedocs.io/en/master/packages/_autosummary/slicing/snorkel.slicing.PandasSFApplier.html#snorkel.slicing.PandasSFApplier).
# The output of the `applier` is an [`np.recarray`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.recarray.html) which stores vectors in named fields indicating whether each of $n$ data points belongs to the corresponding slice.

# %% {"tags": ["md-exclude-output"]}
Пример #12
0
def slicing_evaluation(df_train, df_test, train_model=None):
    if train_model is None:
        train_model = "mlp"

    sfs = [
        SlicingFunction.short_comment, SlicingFunction.ind_keyword,
        SlicingFunction.cmp_re, SlicingFunction.industry_keyword
    ]

    slice_names = [sf.name for sf in sfs]
    scorer = Scorer(metrics=["f1"])

    ft = FT.load(f"{WORK_PATH}/snorkel_flow/sources/fasttext_name_model.bin")

    def get_ftr(text):
        return ft.get_sentence_vector(' '.join(
            [w for w in jieba.lcut(text.strip())]))

    X_train = np.array(list(df_train.text.apply(get_ftr).values))
    X_test = np.array(list(df_test.text.apply(get_ftr).values))
    Y_train = df_train.label.values
    Y_test = df_test.label.values

    if train_model == "lr":
        sklearn_model = LogisticRegression(C=0.001, solver="liblinear")
        sklearn_model.fit(X=X_train, y=Y_train)
        preds_test = sklearn_model.predict(X_test)
        probs_test = preds_to_probs(
            preds_test,
            len([c for c in dir(Polarity) if not c.startswith("__")]))
        print(f"Test set F1: {100 * f1_score(Y_test, preds_test):.1f}%")
        applier = PandasSFApplier(sfs)
        S_test = applier.apply(df_test)
        analysis = scorer.score_slices(S=S_test,
                                       golds=Y_test,
                                       preds=preds_test,
                                       probs=probs_test,
                                       as_dataframe=True)
        return analysis

    if train_model == "mlp":
        # Define model architecture
        bow_dim = X_train.shape[1]
        hidden_dim = bow_dim
        mlp = get_pytorch_mlp(hidden_dim=hidden_dim, num_layers=2)

        # Initialize slice model
        slice_model = SliceAwareClassifier(
            base_architecture=mlp,
            head_dim=hidden_dim,
            slice_names=slice_names,
            scorer=scorer,
        )

        # generate the remaining S matrices with the new set of slicing functions
        applier = PandasSFApplier(sfs)
        S_train = applier.apply(df_train)
        S_test = applier.apply(df_test)

        # add slice labels to an existing dataloader
        BATCH_SIZE = 64

        train_dl = create_dict_dataloader(X_train, Y_train, "train")
        train_dl_slice = slice_model.make_slice_dataloader(
            train_dl.dataset, S_train, shuffle=True, batch_size=BATCH_SIZE)
        test_dl = create_dict_dataloader(X_test, Y_test, "train")
        test_dl_slice = slice_model.make_slice_dataloader(
            test_dl.dataset, S_test, shuffle=False, batch_size=BATCH_SIZE)

        #  fit our classifier with the training set dataloader
        trainer = Trainer(n_epochs=2, lr=1e-4, progress_bar=True)
        trainer.fit(slice_model, [train_dl_slice])

        analysis = slice_model.score_slices([test_dl_slice], as_dataframe=True)
        return analysis
Пример #13
0
from tensorflow.keras import backend as K

tf.set_random_seed(seed)
sess = tf.compat.v1.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

from snorkel.analysis import metric_score
from snorkel.utils import preds_to_probs
from utils import get_keras_logreg, get_keras_early_stopping

keras_model = get_keras_logreg(input_dim=X_train.shape[1])

keras_model.fit(
    x=X_train,
    y=probs_train_filtered,
    validation_data=(X_valid, preds_to_probs(Y_valid, 2)),
    callbacks=[get_keras_early_stopping()],
    epochs=20,
    verbose=0,
)

preds_test = keras_model.predict(x=X_test).argmax(axis=1)
test_acc = metric_score(golds=Y_test, preds=preds_test, metric="accuracy")
print(f"Test Accuracy: {test_acc * 100:.1f}%")

keras_dev_model = get_keras_logreg(input_dim=X_train.shape[1], output_dim=1)

keras_dev_model.fit(
    x=X_dev,
    y=Y_dev,
    validation_data=(X_valid, Y_valid),