def test_mv_default(self): # less than 2 LFs have overlaps label_model = LabelModel(cardinality=2, verbose=False) L = np.array([[-1, -1, 1], [-1, 1, -1], [0, -1, -1]]) label_model.fit(L, n_epochs=100) np.testing.assert_array_almost_equal(label_model.predict(L), np.array([1, 1, 0])) # less than 2 LFs have conflicts L = np.array([[-1, -1, 1], [-1, 1, 1], [1, 1, 1]]) label_model.fit(L, n_epochs=100) np.testing.assert_array_almost_equal(label_model.predict(L), np.array([1, 1, 1]))
def test_label_model_sparse(self) -> None: """Test the LabelModel's estimate of P and Y on a sparse synthetic dataset. This tests the common setting where LFs abstain most of the time, which can cause issues for example if parameter clamping set too high (e.g. see Issue #1422). """ np.random.seed(123) P, Y, L = generate_simple_label_matrix(self.n, self.m, self.cardinality, abstain_multiplier=1000.0) # Train LabelModel label_model = LabelModel(cardinality=self.cardinality, verbose=False) label_model.fit(L, n_epochs=1000, lr=0.01, seed=123) # Test estimated LF conditional probabilities P_lm = label_model.get_conditional_probs() np.testing.assert_array_almost_equal(P, P_lm, decimal=2) # Test predicted labels *only on non-abstained data points* Y_pred = label_model.predict(L, tie_break_policy="abstain") idx, = np.where(Y_pred != -1) acc = np.where(Y_pred[idx] == Y[idx], 1, 0).sum() / len(idx) self.assertGreaterEqual(acc, 0.65) # Make sure that we don't output abstain when an LF votes, per issue #1422 self.assertEqual(len(idx), np.where((L + 1).sum(axis=1) != 0, 1, 0).sum())
def test_save_and_load(self): L = np.array([[0, -1, 0], [0, 1, 1]]) label_model = LabelModel(cardinality=2, verbose=False) label_model.fit(L, n_epochs=1) original_preds = label_model.predict(L) dir_path = tempfile.mkdtemp() save_path = dir_path + "label_model.pkl" label_model.save(save_path) label_model_new = LabelModel(cardinality=2, verbose=False) label_model_new.load(save_path) loaded_preds = label_model_new.predict(L) shutil.rmtree(dir_path) np.testing.assert_array_equal(loaded_preds, original_preds)
def snorkel_process(keylist, dataframe, allweaklabf): def func(x): idx = (-x).argsort()[1:] x[idx] = 0 return x cardinalitynu = len(keylist) applier = PandasLFApplier(lfs=allweaklabf) all_train_l = applier.apply(df=dataframe) report = LFAnalysis(L=all_train_l, lfs=allweaklabf).lf_summary() print(report) label_model = LabelModel(cardinality=cardinalitynu, verbose=False) label_model.fit(all_train_l) predt = label_model.predict(all_train_l) predt1 = label_model.predict_proba(all_train_l) keylist1 = keylist.copy() #keylist1.append('Not_relevent') predt2 = pd.DataFrame(predt1, columns=keylist1) dataframe['L_label'] = predt dataframe1 = dataframe.join(predt2, how='outer') dataframe1 = dataframe1[dataframe1.L_label >= 0] train, test = train_test_split(dataframe1, test_size=0.2) trainsent = train.sent.values trainlabel = train[keylist].values trainlabe2 = trainlabel.copy() np.apply_along_axis(func, 1, trainlabe2) trainlabe2 = np.where(trainlabe2 > 0, 1, 0) testsent = test.sent.values testlabel = test[keylist].values testlabe2 = testlabel.copy() np.apply_along_axis(func, 1, testlabe2) testlabe2 = np.where(testlabe2 > 0, 1, 0) return trainsent, trainlabe2, testsent, testlabe2, keylist, report
def test_score(self): L = np.array([[1, 1, 0], [-1, -1, -1], [1, 0, 1]]) Y = np.array([1, 0, 1]) label_model = LabelModel(cardinality=2, verbose=False) label_model.fit(L, n_epochs=100) results = label_model.score(L, Y, metrics=["accuracy", "coverage"]) np.testing.assert_array_almost_equal(label_model.predict(L), np.array([1, -1, 1])) results_expected = dict(accuracy=1.0, coverage=2 / 3) self.assertEqual(results, results_expected) L = np.array([[1, 0, 1], [1, 0, 1]]) label_model = self._set_up_model(L) label_model.mu = nn.Parameter(label_model.mu_init.clone().clamp( 0.01, 0.99)) results = label_model.score(L, Y=np.array([0, 1])) results_expected = dict(accuracy=0.5) self.assertEqual(results, results_expected) results = label_model.score(L=L, Y=np.array([1, 0]), metrics=["accuracy", "f1"]) results_expected = dict(accuracy=0.5, f1=2 / 3) self.assertEqual(results, results_expected)
def generate_labels_with_snorkel(dataframe): """ Labels the full data using Snorkel :param dataframe: Pandas dataframe containing all data :return: dataframe extended with a label column """ # Define the set of labeling functions (LFs) lfs = [ lf_ubo_is_company, lf_troika_company, lf_uk_blacklisted_company, lf_non_uk_blacklisted_company ] # Apply the LFs to the unlabeled training data applier = PandasLFApplier(lfs) L_train = applier.apply(dataframe) # Train the label model and compute the training labels label_model = LabelModel(cardinality=2, verbose=True) label_model.fit(L_train, n_epochs=500, log_freq=50, seed=123) dataframe["label"] = label_model.predict(L=L_train, tie_break_policy="abstain") # Filter out the abstain data points dataframe = dataframe[dataframe.label != ABSTAIN] return dataframe
def test_predict(self): # 3 LFs that always disagree/abstain leads to all abstains L = np.array([[-1, 1, 0], [0, -1, 1], [1, 0, -1]]) label_model = LabelModel(cardinality=2, verbose=False) label_model.fit(L, n_epochs=100) np.testing.assert_array_almost_equal(label_model.predict(L), np.array([-1, -1, -1])) L = np.array([[0, 1, 0], [0, 1, 0]]) label_model = self._set_up_model(L) label_model.mu = nn.Parameter(label_model.mu_init.clone().clamp( 0.01, 0.99)) preds = label_model.predict(L) true_preds = np.array([0, 0]) np.testing.assert_array_equal(preds, true_preds) preds, probs = label_model.predict(L, return_probs=True) true_probs = np.array([[0.99, 0.01], [0.99, 0.01]]) np.testing.assert_array_almost_equal(probs, true_probs)
def curate_twitter(save_name='../../pandafied_data/curated_twitter.csv'): df_train = pd.read_csv('../../pandafied_data/pandafied_twitter.csv') #from utils import load_unlabeled_spam_dataset #df_train = load_unlabeled_spam_dataset() # Define the set of labeling functions (LFs) #lfs = [lf_keyword_wateroverlast,lf_keyword_voertuig,lf_keyword_aanrijding,lf_keyword_te_water,lf_keyword_persoon,lf_keyword_brand,lf_keyword_mps,lf_keyword_kps,lf_keyword_luchtdr] #lfs = [lf_keyword_keywords] lfs = [lf_keyword_wateroverlast] # Apply the LFs to the unlabeled training data applier = PandasLFApplier(lfs) L_train = applier.apply(df_train) # Train the label model and compute the training labels label_model = LabelModel(cardinality=2, verbose=True) label_model.fit(L_train, n_epochs=500, log_freq=50, seed=123) df_train["label"] = label_model.predict(L=L_train, tie_break_policy="abstain") #tie_break_policy="true-random" #tie_break_policy="abstain" counter = 0 for i in range(len(df_train["label"])): if df_train["label"][i] == WATER: print() print(df_train["text"][i]) print(df_train["label"][i]) print() counter += 1 print("num entries total: " + str(len(df_train["label"]))) print("num entries water: " + str(counter)) #df_train = df_train[df_train.label != ABSTAIN] twitter_curated = df_train[df_train.label == WATER] twitter_curated = twitter_curated.drop(columns='label') twitter_curated.to_csv(save_name, index=False)
test_unfired_idx = [i for i,item in enumerate(test_m) if sum(item)==0] targets_test = test_L[test_fired_idx] #majority voting using snorkel's majority voting model maj_preds_test = majority_model.predict(L=test_lsnork[test_fired_idx]) maj_precision_test, maj_recall_test, maj_f1_score_test, maj_support_test = precision_recall_fscore_support(targets_test, maj_preds_test) maj_accuracy_test = compute_accuracy(maj_support_test, maj_recall_test) print("precision on *** RULE COVERD TEST SET *** of MAJORITY VOTING: {}".format(maj_precision_test)) print("recall on *** RULE COVERED TEST SET *** of MAJORITY VOTING: {}".format(maj_recall_test)) print("f1_score on *** RULE COVERED TEST SET *** of MAJORITY VOTING: {}".format(maj_f1_score_test)) print("support on *** RULE COVERED TEST SET *** of MAJORITY VOTING: {}".format(maj_support_test)) print("accuracy on *** RULE COVERED TEST SET *** of MAJORITY VOTING: {}".format(maj_accuracy_test)) #Now train snorkels label model print("Training Snorkel's LabelModel") label_model = LabelModel(cardinality=num_classes, verbose=True) label_model.fit(L_train=U_lsnork, n_epochs=1000, lr=0.001, log_freq=100, seed=123) label_model.save(os.path.join(path_dir,"saved_label_model")) snork_preds_test = label_model.predict(L=test_lsnork[test_fired_idx]) snork_precision_test, snork_recall_test, snork_f1_score_test, snork_support_test = precision_recall_fscore_support(targets_test, snork_preds_test) snork_accuracy_test = compute_accuracy(snork_support_test, snork_recall_test) print("precision on *** RULE COVERED TEST SET *** of SNORKEL VOTING: {}".format(snork_precision_test)) print("recall on *** RULE COVERED TEST SET *** of SNORKEL VOTING: {}".format(snork_recall_test)) print("f1_score on *** RULE COVERED TEST SET *** of SNORKEL VOTING: {}".format(snork_f1_score_test)) print("support on *** RULE COVERED TEST SET *** of SNORKEL VOTING: {}".format(snork_support_test)) print("accuracy on *** RULE COVERED TEST SET *** of SNORKEL VOTING: {}".format(snork_accuracy_test))
lf.lf_spacy_words_sexism, lf.lf_keyword_raicism, lf.lf_spacy_words_gpe, lf.lf_keyword_shaming, lf.lf_spacy_threat, lf.lf_spacy_terrorism, lf.lf_neg_nonehumansubject ] # Unused ones : # lf.lf_spacy_animals, lf.lf_spacy_politics, # giving false positives # Apply the LFs to the unlabeled training data applier = PandasLFApplier(lfs) L_train = applier.apply(df_train) # Train the label model and compute the training labels # Cardinality was 2. Got : ValueError: L_train has cardinality 3, cardinality=2 passed in. label_model = LabelModel(cardinality=3, verbose=True) label_model.fit(L_train, n_epochs=500, log_freq=50, seed=123) df_train["label"] = label_model.predict(L=L_train, tie_break_policy="abstain") #output df_train.to_csv('labelledDataset.csv', index=None, header=True) # Filter out useless data df_train = df_train[df_train.label != ABSTAIN] print("Useful data remaining: " + str(df_train.shape[0])) # Ignoring Transformation Functions for Data Augmentation for now... # TODO: create transformation functions for different categories of hatespeech # Ignoring slicing, don't think we need it # Training a Classifier docs = df_train.iloc[:, 0].tolist() # first column of data frame (first_name)
# ## Train LabelModel And Generate Probabilistic Labels # %% {"tags": ["md-exclude-output"]} from snorkel.labeling import LabelModel # Train LabelModel. label_model = LabelModel(cardinality=2, verbose=True) label_model.fit(L_train, n_epochs=100, seed=123, log_freq=20, l2=0.1, lr=0.01) # %% [markdown] # As a spot-check for the quality of our LabelModel, we'll score it on the dev set. # %% from snorkel.analysis import metric_score preds_dev = label_model.predict(L_dev) acc = metric_score(Y_dev, preds_dev, probs=None, metric="accuracy") print(f"LabelModel Accuracy: {acc:.3f}") # %% [markdown] # We see that we get very high accuracy on the development set. # This is due to the abundance of high quality crowdworker labels. # **Since we don't have these high quality crowdsourcing labels for the # test set or new incoming data points, we can't use the LabelModel reliably # at inference time.** # In order to run inference on new incoming data points, we need to train a # discriminative model over the tweets themselves. # Let's generate a set of labels for that training set. # %%
# %% label_model.score(L_valid, Y_valid, metrics=["f1_micro"]) # %% [markdown] # ## 4. Train a Classifier # You can then use these training labels to train any standard discriminative model, such as [an off-the-shelf ResNet](https://github.com/KaimingHe/deep-residual-networks), which should learn to generalize beyond the LF's we've developed! # %% [markdown] # #### Create DataLoaders for Classifier # %% from snorkel.classification import DictDataLoader from model import SceneGraphDataset, create_model df_train["labels"] = label_model.predict(L_train) if sample: TRAIN_DIR = "data/VRD/sg_dataset/samples" else: TRAIN_DIR = "data/VRD/sg_dataset/sg_train_images" dl_train = DictDataLoader( SceneGraphDataset("train_dataset", "train", TRAIN_DIR, df_train), batch_size=16, shuffle=True, ) dl_valid = DictDataLoader( SceneGraphDataset("valid_dataset", "valid", TRAIN_DIR, df_valid), batch_size=16,
LFAnalysis(L=L_train, lfs=lfs).lf_summary() # In[12]: print( f"Training set coverage: {100 * LFAnalysis(L_train).label_coverage(): 0.001f}%" ) # In[15]: from snorkel.labeling import LabelModel # Train LabelModel. label_model = LabelModel(cardinality=2, verbose=True) label_model.fit(L_train, n_epochs=150, seed=125, log_freq=30, l2=0.1, lr=0.01) # In[16]: label = label_model.predict(L_train) # In[18]: len(label) # In[26]: # In[ ]: with open('flabel.pkl', 'wb') as f: pickle.dump(label, f)