def test_sce_equals_ce(self): # Does soft ce loss match classic ce loss when labels are one-hot? Y_golds = torch.LongTensor([0, 1, 2]) Y_golds_probs = torch.Tensor( preds_to_probs(Y_golds.numpy(), num_classes=4)) Y_probs = torch.rand_like(Y_golds_probs) Y_probs = Y_probs / Y_probs.sum(dim=1).reshape(-1, 1) ce_loss = F.cross_entropy(Y_probs, Y_golds, reduction="none") ces_loss = cross_entropy_with_probs(Y_probs, Y_golds_probs, reduction="none") np.testing.assert_equal(ce_loss.numpy(), ces_loss.numpy()) ce_loss = F.cross_entropy(Y_probs, Y_golds, reduction="sum") ces_loss = cross_entropy_with_probs(Y_probs, Y_golds_probs, reduction="sum") np.testing.assert_equal(ce_loss.numpy(), ces_loss.numpy()) ce_loss = F.cross_entropy(Y_probs, Y_golds, reduction="mean") ces_loss = cross_entropy_with_probs(Y_probs, Y_golds_probs, reduction="mean") np.testing.assert_equal(ce_loss.numpy(), ces_loss.numpy())
def test_roc_auc(self): golds = np.array([0, 0, 0, 0, 1]) probs = preds_to_probs(golds, 2) probs_nonbinary = np.array([ [1.0, 0.0, 0.0], [0.7, 0.0, 0.3], [0.8, 0.0, 0.2], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], ]) roc_auc = metric_score(golds, preds=None, probs=probs, metric="roc_auc") self.assertAlmostEqual(roc_auc, 1.0) probs = np.fliplr(probs) roc_auc = metric_score(golds, preds=None, probs=probs, metric="roc_auc") self.assertAlmostEqual(roc_auc, 0.0) with self.assertRaisesRegex( ValueError, "Metric roc_auc is currently only defined for binary"): metric_score(golds, preds=None, probs=probs_nonbinary, metric="roc_auc")
def train_model_from_probs(df_train_filtered, probs_train_filtered, df_valid, df_test): set_seeds() vectorizer = modeler.vectorizer X_train = vectorizer.fit_transform(df_train_filtered.text.tolist()) X_valid = vectorizer.transform(df_valid["text"].tolist()) X_test = vectorizer.transform(df_test["text"].tolist()) Y_valid = df_valid["label"].values Y_test = df_test["label"].values # Define a vanilla logistic regression model with Keras keras_model = get_keras_logreg(input_dim=X_train.shape[1]) keras_model.fit( x=X_train, y=probs_train_filtered, validation_data=(X_valid, preds_to_probs(Y_valid, 2)), callbacks=[get_keras_early_stopping()], epochs=50, verbose=0, ) modeler.keras_model = keras_model preds_test = keras_model.predict(x=X_test).argmax(axis=1) stats = modeler.get_stats(modeler.Y_test, preds_test) update_stats({**stats, "data": "test"}, "train_model") return stats
def test_invalid_reduction(self): Y_golds = torch.LongTensor([0, 1, 2]) Y_golds_probs = torch.Tensor( preds_to_probs(Y_golds.numpy(), num_classes=4)) Y_probs = torch.rand_like(Y_golds_probs) Y_probs = Y_probs / Y_probs.sum(dim=1).reshape(-1, 1) with self.assertRaisesRegex(ValueError, "Keyword 'reduction' must be"): cross_entropy_with_probs(Y_probs, Y_golds_probs, reduction="bad")
def test_perfect_predictions(self): # Does soft ce loss achieve approx. 0 loss with perfect predictions? Y_golds = torch.LongTensor([0, 1, 2]) Y_golds_probs = torch.Tensor( preds_to_probs(Y_golds.numpy(), num_classes=4)) Y_probs = Y_golds_probs.clone() Y_probs[Y_probs == 1] = 100 Y_probs[Y_probs == 0] = -100 ces_loss = cross_entropy_with_probs(Y_probs, Y_golds_probs) np.testing.assert_equal(ces_loss.numpy(), 0)
def test_score_slices(self): DATA = [5, 10, 19, 22, 25] @slicing_function() def sf(x): return x.num < 20 # We expect 3/5 correct -> 0.6 accuracy golds = np.array([0, 1, 0, 1, 0]) preds = np.array([0, 0, 0, 0, 0]) probs = preds_to_probs(preds, 2) # In the slice, we expect the last 2 elements to masked # We expect 2/3 correct -> 0.666 accuracy data = [SimpleNamespace(num=x) for x in DATA] S = SFApplier([sf]).apply(data) scorer = Scorer(metrics=["accuracy"]) # Test normal score metrics = scorer.score(golds=golds, preds=preds, probs=probs) self.assertEqual(metrics["accuracy"], 0.6) # Test score_slices slice_metrics = scorer.score_slices(S=S, golds=golds, preds=preds, probs=probs) self.assertEqual(slice_metrics["overall"]["accuracy"], 0.6) self.assertEqual(slice_metrics["sf"]["accuracy"], 2.0 / 3.0) # Test as_dataframe=True metrics_df = scorer.score_slices(S=S, golds=golds, preds=preds, probs=probs, as_dataframe=True) self.assertTrue(isinstance(metrics_df, pd.DataFrame)) self.assertEqual(metrics_df["accuracy"]["overall"], 0.6) self.assertEqual(metrics_df["accuracy"]["sf"], 2.0 / 3.0) # Test wrong shapes with self.assertRaisesRegex(ValueError, "must have the same number of elements"): scorer.score_slices(S=S, golds=golds[:1], preds=preds, probs=probs, as_dataframe=True)
def test_loss_weights(self): FACTOR = 10 # Do class weights work as expected? Y_golds = torch.LongTensor([0, 0, 1]) Y_golds_probs = torch.Tensor( preds_to_probs(Y_golds.numpy(), num_classes=3)) # Predict [1, 1, 1] Y_probs = torch.tensor([[-100.0, 100.0, -100.0], [-100.0, 100.0, -100.0], [-100.0, 100.0, -100.0]]) ces_loss0 = cross_entropy_with_probs(Y_probs, Y_golds_probs).numpy() weight1 = torch.FloatTensor([1, 1, 1]) ces_loss1 = cross_entropy_with_probs(Y_probs, Y_golds_probs, weight=weight1).numpy() # Do weights of 1 match no weights at all? self.assertEqual(ces_loss0, ces_loss1) weight2 = torch.FloatTensor([1, 2, 1]) ces_loss2 = cross_entropy_with_probs(Y_probs, Y_golds_probs, weight=weight2).numpy() weight3 = weight2 * FACTOR ces_loss3 = cross_entropy_with_probs(Y_probs, Y_golds_probs, weight=weight3).numpy() # If weights are X times larger, is loss X times larger? self.assertAlmostEqual(ces_loss2 * FACTOR, ces_loss3, places=3) # Note that PyTorch's cross-entropy loss has the unusual behavior that weights # behave differently when losses are averaged inside vs. outside the function. # See https://github.com/pytorch/pytorch/issues/8062 for details. ce_loss3 = (F.cross_entropy(Y_probs, Y_golds, weight=weight3, reduction="none").mean().numpy()) # Do hard and soft ce loss still match when we use class weights? self.assertAlmostEqual(ce_loss3, ces_loss3, places=3)
def train(self): probs_train = self.label_model.predict_proba(L=self.L_train) df_train_filtered, probs_train_filtered = filter_unlabeled_dataframe( X=self.df_train, y=probs_train, L=self.L_train) if len(df_train_filtered) == 0: print("Labeling functions cover none of the training examples!", file=sys.stderr) return {"micro_f1": 0} #from tensorflow.keras.utils import to_categorical #df_train_filtered, probs_train_filtered = self.df_dev, to_categorical(self.df_dev["label"].values) vectorizer = self.vectorizer X_train = vectorizer.transform(df_train_filtered.text.tolist()) X_dev = vectorizer.transform(self.df_dev.text.tolist()) X_valid = vectorizer.transform(self.df_valid.text.tolist()) X_test = vectorizer.transform(self.df_test.text.tolist()) self.keras_model = get_keras_logreg(input_dim=X_train.shape[1]) self.keras_model.fit( x=X_train, y=probs_train_filtered, validation_data=(X_valid, preds_to_probs(self.Y_valid, 2)), callbacks=[get_keras_early_stopping()], epochs=20, verbose=0, ) preds_test = self.keras_model.predict(x=X_test).argmax(axis=1) #return preds_test return self.get_stats(self.Y_test, preds_test)
def test_preds_to_probs(self): np.testing.assert_array_equal(preds_to_probs(PREDS, 2), PREDS_ROUND)
tf.set_random_seed(seed) sess = tf.compat.v1.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) # %% {"tags": ["md-exclude-output"]} from snorkel.analysis import metric_score from snorkel.utils import preds_to_probs from utils import get_keras_logreg, get_keras_early_stopping # Define a vanilla logistic regression model with Keras keras_model = get_keras_logreg(input_dim=X_train.shape[1]) keras_model.fit( x=X_train, y=probs_train_filtered, validation_data=(X_valid, preds_to_probs(Y_valid, 2)), callbacks=[get_keras_early_stopping()], epochs=50, verbose=0, ) # %% preds_test = keras_model.predict(x=X_test).argmax(axis=1) test_acc = metric_score(golds=Y_test, preds=preds_test, metric="accuracy") print(f"Test Accuracy: {test_acc * 100:.1f}%") # %% [markdown] # **We observe an additional boost in accuracy over the `LabelModel` by multiple points! # By using the label model to transfer the domain knowledge encoded in our LFs to the discriminative model, # we were able to generalize beyond the noisy labeling heuristics**.
X_test, Y_test = df_to_features(vectorizer, df_test, "test") # %% [markdown] # We define a `LogisticRegression` model from `sklearn`. # %% from sklearn.linear_model import LogisticRegression sklearn_model = LogisticRegression(C=0.001, solver="liblinear") sklearn_model.fit(X=X_train, y=Y_train) # %% from snorkel.utils import preds_to_probs preds_test = sklearn_model.predict(X_test) probs_test = preds_to_probs(preds_test, 2) # %% from sklearn.metrics import f1_score print(f"Test set F1: {100 * f1_score(Y_test, preds_test):.1f}%") # %% [markdown] # ### Store slice metadata in `S` # %% [markdown] # We apply our list of `sfs` to the data using an SF applier. # For our data format, we leverage the [`PandasSFApplier`](https://snorkel.readthedocs.io/en/master/packages/_autosummary/slicing/snorkel.slicing.PandasSFApplier.html#snorkel.slicing.PandasSFApplier). # The output of the `applier` is an [`np.recarray`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.recarray.html) which stores vectors in named fields indicating whether each of $n$ data points belongs to the corresponding slice. # %% {"tags": ["md-exclude-output"]}
def slicing_evaluation(df_train, df_test, train_model=None): if train_model is None: train_model = "mlp" sfs = [ SlicingFunction.short_comment, SlicingFunction.ind_keyword, SlicingFunction.cmp_re, SlicingFunction.industry_keyword ] slice_names = [sf.name for sf in sfs] scorer = Scorer(metrics=["f1"]) ft = FT.load(f"{WORK_PATH}/snorkel_flow/sources/fasttext_name_model.bin") def get_ftr(text): return ft.get_sentence_vector(' '.join( [w for w in jieba.lcut(text.strip())])) X_train = np.array(list(df_train.text.apply(get_ftr).values)) X_test = np.array(list(df_test.text.apply(get_ftr).values)) Y_train = df_train.label.values Y_test = df_test.label.values if train_model == "lr": sklearn_model = LogisticRegression(C=0.001, solver="liblinear") sklearn_model.fit(X=X_train, y=Y_train) preds_test = sklearn_model.predict(X_test) probs_test = preds_to_probs( preds_test, len([c for c in dir(Polarity) if not c.startswith("__")])) print(f"Test set F1: {100 * f1_score(Y_test, preds_test):.1f}%") applier = PandasSFApplier(sfs) S_test = applier.apply(df_test) analysis = scorer.score_slices(S=S_test, golds=Y_test, preds=preds_test, probs=probs_test, as_dataframe=True) return analysis if train_model == "mlp": # Define model architecture bow_dim = X_train.shape[1] hidden_dim = bow_dim mlp = get_pytorch_mlp(hidden_dim=hidden_dim, num_layers=2) # Initialize slice model slice_model = SliceAwareClassifier( base_architecture=mlp, head_dim=hidden_dim, slice_names=slice_names, scorer=scorer, ) # generate the remaining S matrices with the new set of slicing functions applier = PandasSFApplier(sfs) S_train = applier.apply(df_train) S_test = applier.apply(df_test) # add slice labels to an existing dataloader BATCH_SIZE = 64 train_dl = create_dict_dataloader(X_train, Y_train, "train") train_dl_slice = slice_model.make_slice_dataloader( train_dl.dataset, S_train, shuffle=True, batch_size=BATCH_SIZE) test_dl = create_dict_dataloader(X_test, Y_test, "train") test_dl_slice = slice_model.make_slice_dataloader( test_dl.dataset, S_test, shuffle=False, batch_size=BATCH_SIZE) # fit our classifier with the training set dataloader trainer = Trainer(n_epochs=2, lr=1e-4, progress_bar=True) trainer.fit(slice_model, [train_dl_slice]) analysis = slice_model.score_slices([test_dl_slice], as_dataframe=True) return analysis
from tensorflow.keras import backend as K tf.set_random_seed(seed) sess = tf.compat.v1.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) from snorkel.analysis import metric_score from snorkel.utils import preds_to_probs from utils import get_keras_logreg, get_keras_early_stopping keras_model = get_keras_logreg(input_dim=X_train.shape[1]) keras_model.fit( x=X_train, y=probs_train_filtered, validation_data=(X_valid, preds_to_probs(Y_valid, 2)), callbacks=[get_keras_early_stopping()], epochs=20, verbose=0, ) preds_test = keras_model.predict(x=X_test).argmax(axis=1) test_acc = metric_score(golds=Y_test, preds=preds_test, metric="accuracy") print(f"Test Accuracy: {test_acc * 100:.1f}%") keras_dev_model = get_keras_logreg(input_dim=X_train.shape[1], output_dim=1) keras_dev_model.fit( x=X_dev, y=Y_dev, validation_data=(X_valid, Y_valid),