Python LabelModel.predict примеры использования

Язык программирования: Python

Пространство имен/Пакет: snorkel.labeling

Класс/Тип: LabelModel

Метод/Функция: predict

Примеров на hotexamples.com: 13

Python LabelModel.predict - 13 примеров найдено. Это лучшие примеры Python кода для snorkel.labeling.LabelModel.predict, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

fit(30)

LabelModel(25)

predict_proba(20)

predict(13)

score(12)

save(6)

_set_class_balance(6)

mu(5)

load(5)

get_weights(4)

m(3)

get_conditional_probs(3)

_set_constants(3)

_break_col_permutation_symmetry(2)

_loss_mu(2)

_get_augmented_label_matrix(2)

_create_tree(2)

_loss_l2(1)

_init_params(1)

_get_conditional_probs(1)

_generate_O(1)

_count_accurate_lfs(1)

_build_mask(1)

train_config(1)

Пример #1

Показать файл

Файл: test_label_model.py Проект: zuiwufenghua/slice_based_learning

    def test_mv_default(self):
        # less than 2 LFs have overlaps
        label_model = LabelModel(cardinality=2, verbose=False)
        L = np.array([[-1, -1, 1], [-1, 1, -1], [0, -1, -1]])
        label_model.fit(L, n_epochs=100)
        np.testing.assert_array_almost_equal(label_model.predict(L),
                                             np.array([1, 1, 0]))

        # less than 2 LFs have conflicts
        L = np.array([[-1, -1, 1], [-1, 1, 1], [1, 1, 1]])
        label_model.fit(L, n_epochs=100)
        np.testing.assert_array_almost_equal(label_model.predict(L),
                                             np.array([1, 1, 1]))

Пример #2

Показать файл

Файл: test_label_model.py Проект: zuiwufenghua/slice_based_learning

    def test_label_model_sparse(self) -> None:
        """Test the LabelModel's estimate of P and Y on a sparse synthetic dataset.

        This tests the common setting where LFs abstain most of the time, which can
        cause issues for example if parameter clamping set too high (e.g. see Issue
        #1422).
        """
        np.random.seed(123)
        P, Y, L = generate_simple_label_matrix(self.n,
                                               self.m,
                                               self.cardinality,
                                               abstain_multiplier=1000.0)

        # Train LabelModel
        label_model = LabelModel(cardinality=self.cardinality, verbose=False)
        label_model.fit(L, n_epochs=1000, lr=0.01, seed=123)

        # Test estimated LF conditional probabilities
        P_lm = label_model.get_conditional_probs()
        np.testing.assert_array_almost_equal(P, P_lm, decimal=2)

        # Test predicted labels *only on non-abstained data points*
        Y_pred = label_model.predict(L, tie_break_policy="abstain")
        idx, = np.where(Y_pred != -1)
        acc = np.where(Y_pred[idx] == Y[idx], 1, 0).sum() / len(idx)
        self.assertGreaterEqual(acc, 0.65)

        # Make sure that we don't output abstain when an LF votes, per issue #1422
        self.assertEqual(len(idx),
                         np.where((L + 1).sum(axis=1) != 0, 1, 0).sum())

Пример #3

Показать файл

Файл: test_label_model.py Проект: zuiwufenghua/slice_based_learning

    def test_save_and_load(self):
        L = np.array([[0, -1, 0], [0, 1, 1]])
        label_model = LabelModel(cardinality=2, verbose=False)
        label_model.fit(L, n_epochs=1)
        original_preds = label_model.predict(L)

        dir_path = tempfile.mkdtemp()
        save_path = dir_path + "label_model.pkl"
        label_model.save(save_path)

        label_model_new = LabelModel(cardinality=2, verbose=False)
        label_model_new.load(save_path)
        loaded_preds = label_model_new.predict(L)
        shutil.rmtree(dir_path)

        np.testing.assert_array_equal(loaded_preds, original_preds)

Пример #4

Показать файл

Файл: snorkel_process_NIH.py Проект: yejinjkim/kaggle-covid19-literature

def snorkel_process(keylist, dataframe, allweaklabf):
    def func(x):
        idx = (-x).argsort()[1:]
        x[idx] = 0
        return x

    cardinalitynu = len(keylist)
    applier = PandasLFApplier(lfs=allweaklabf)
    all_train_l = applier.apply(df=dataframe)
    report = LFAnalysis(L=all_train_l, lfs=allweaklabf).lf_summary()
    print(report)
    label_model = LabelModel(cardinality=cardinalitynu, verbose=False)
    label_model.fit(all_train_l)
    predt = label_model.predict(all_train_l)
    predt1 = label_model.predict_proba(all_train_l)
    keylist1 = keylist.copy()
    #keylist1.append('Not_relevent')
    predt2 = pd.DataFrame(predt1, columns=keylist1)
    dataframe['L_label'] = predt
    dataframe1 = dataframe.join(predt2, how='outer')
    dataframe1 = dataframe1[dataframe1.L_label >= 0]

    train, test = train_test_split(dataframe1, test_size=0.2)

    trainsent = train.sent.values
    trainlabel = train[keylist].values
    trainlabe2 = trainlabel.copy()
    np.apply_along_axis(func, 1, trainlabe2)
    trainlabe2 = np.where(trainlabe2 > 0, 1, 0)
    testsent = test.sent.values
    testlabel = test[keylist].values
    testlabe2 = testlabel.copy()
    np.apply_along_axis(func, 1, testlabe2)
    testlabe2 = np.where(testlabe2 > 0, 1, 0)
    return trainsent, trainlabe2, testsent, testlabe2, keylist, report

Пример #5

Показать файл

Файл: test_label_model.py Проект: zuiwufenghua/slice_based_learning

    def test_score(self):
        L = np.array([[1, 1, 0], [-1, -1, -1], [1, 0, 1]])
        Y = np.array([1, 0, 1])
        label_model = LabelModel(cardinality=2, verbose=False)
        label_model.fit(L, n_epochs=100)
        results = label_model.score(L, Y, metrics=["accuracy", "coverage"])
        np.testing.assert_array_almost_equal(label_model.predict(L),
                                             np.array([1, -1, 1]))

        results_expected = dict(accuracy=1.0, coverage=2 / 3)
        self.assertEqual(results, results_expected)

        L = np.array([[1, 0, 1], [1, 0, 1]])
        label_model = self._set_up_model(L)
        label_model.mu = nn.Parameter(label_model.mu_init.clone().clamp(
            0.01, 0.99))

        results = label_model.score(L, Y=np.array([0, 1]))
        results_expected = dict(accuracy=0.5)
        self.assertEqual(results, results_expected)

        results = label_model.score(L=L,
                                    Y=np.array([1, 0]),
                                    metrics=["accuracy", "f1"])
        results_expected = dict(accuracy=0.5, f1=2 / 3)
        self.assertEqual(results, results_expected)

Пример #6

Показать файл

Файл: snorkel_processing.py Проект: SebastianHurubaru/cs229_ai_fight_financial_crime

def generate_labels_with_snorkel(dataframe):
    """
    Labels the full data using Snorkel
    :param dataframe: Pandas dataframe containing all data
    :return: dataframe extended with a label column
    """

    # Define the set of labeling functions (LFs)
    lfs = [
        lf_ubo_is_company, lf_troika_company, lf_uk_blacklisted_company,
        lf_non_uk_blacklisted_company
    ]

    # Apply the LFs to the unlabeled training data
    applier = PandasLFApplier(lfs)
    L_train = applier.apply(dataframe)

    # Train the label model and compute the training labels
    label_model = LabelModel(cardinality=2, verbose=True)
    label_model.fit(L_train, n_epochs=500, log_freq=50, seed=123)
    dataframe["label"] = label_model.predict(L=L_train,
                                             tie_break_policy="abstain")

    # Filter out the abstain data points
    dataframe = dataframe[dataframe.label != ABSTAIN]

    return dataframe

Пример #7

Показать файл

Файл: test_label_model.py Проект: zuiwufenghua/slice_based_learning

    def test_predict(self):
        # 3 LFs that always disagree/abstain leads to all abstains
        L = np.array([[-1, 1, 0], [0, -1, 1], [1, 0, -1]])
        label_model = LabelModel(cardinality=2, verbose=False)
        label_model.fit(L, n_epochs=100)
        np.testing.assert_array_almost_equal(label_model.predict(L),
                                             np.array([-1, -1, -1]))

        L = np.array([[0, 1, 0], [0, 1, 0]])
        label_model = self._set_up_model(L)

        label_model.mu = nn.Parameter(label_model.mu_init.clone().clamp(
            0.01, 0.99))
        preds = label_model.predict(L)

        true_preds = np.array([0, 0])
        np.testing.assert_array_equal(preds, true_preds)

        preds, probs = label_model.predict(L, return_probs=True)
        true_probs = np.array([[0.99, 0.01], [0.99, 0.01]])
        np.testing.assert_array_almost_equal(probs, true_probs)

Пример #8

Показать файл

Файл: curate_twitter.py Проект: janvanrijn/regenwateroverlast

def curate_twitter(save_name='../../pandafied_data/curated_twitter.csv'):
    df_train = pd.read_csv('../../pandafied_data/pandafied_twitter.csv')
    #from utils import load_unlabeled_spam_dataset
    #df_train = load_unlabeled_spam_dataset()

    # Define the set of labeling functions (LFs)
    #lfs = [lf_keyword_wateroverlast,lf_keyword_voertuig,lf_keyword_aanrijding,lf_keyword_te_water,lf_keyword_persoon,lf_keyword_brand,lf_keyword_mps,lf_keyword_kps,lf_keyword_luchtdr]

    #lfs = [lf_keyword_keywords]

    lfs = [lf_keyword_wateroverlast]

    # Apply the LFs to the unlabeled training data
    applier = PandasLFApplier(lfs)
    L_train = applier.apply(df_train)

    # Train the label model and compute the training labels
    label_model = LabelModel(cardinality=2, verbose=True)
    label_model.fit(L_train, n_epochs=500, log_freq=50, seed=123)
    df_train["label"] = label_model.predict(L=L_train,
                                            tie_break_policy="abstain")
    #tie_break_policy="true-random"
    #tie_break_policy="abstain"
    counter = 0
    for i in range(len(df_train["label"])):
        if df_train["label"][i] == WATER:
            print()
            print(df_train["text"][i])
            print(df_train["label"][i])
            print()
            counter += 1

    print("num entries total: " + str(len(df_train["label"])))
    print("num entries water: " + str(counter))

    #df_train = df_train[df_train.label != ABSTAIN]

    twitter_curated = df_train[df_train.label == WATER]
    twitter_curated = twitter_curated.drop(columns='label')
    twitter_curated.to_csv(save_name, index=False)

Пример #9

Показать файл

test_unfired_idx = [i for i,item in enumerate(test_m) if sum(item)==0]
targets_test = test_L[test_fired_idx]

#majority voting using snorkel's majority voting model
maj_preds_test = majority_model.predict(L=test_lsnork[test_fired_idx])
maj_precision_test, maj_recall_test, maj_f1_score_test, maj_support_test = precision_recall_fscore_support(targets_test, maj_preds_test)
maj_accuracy_test = compute_accuracy(maj_support_test, maj_recall_test)

print("precision on *** RULE COVERD TEST SET ***   of MAJORITY VOTING: {}".format(maj_precision_test))
print("recall on *** RULE COVERED TEST SET ***  of MAJORITY VOTING: {}".format(maj_recall_test))
print("f1_score on *** RULE COVERED TEST SET *** of MAJORITY VOTING: {}".format(maj_f1_score_test))
print("support on *** RULE COVERED TEST SET ***  of MAJORITY VOTING: {}".format(maj_support_test))
print("accuracy on *** RULE COVERED TEST SET ***   of MAJORITY VOTING: {}".format(maj_accuracy_test))


#Now train snorkels label model
print("Training Snorkel's LabelModel")
label_model = LabelModel(cardinality=num_classes, verbose=True)
label_model.fit(L_train=U_lsnork, n_epochs=1000, lr=0.001, log_freq=100, seed=123)
label_model.save(os.path.join(path_dir,"saved_label_model"))



snork_preds_test = label_model.predict(L=test_lsnork[test_fired_idx])
snork_precision_test, snork_recall_test, snork_f1_score_test, snork_support_test = precision_recall_fscore_support(targets_test, snork_preds_test)
snork_accuracy_test = compute_accuracy(snork_support_test, snork_recall_test)
print("precision on *** RULE COVERED TEST SET *** of SNORKEL VOTING: {}".format(snork_precision_test))
print("recall on *** RULE COVERED TEST SET *** of SNORKEL VOTING: {}".format(snork_recall_test))
print("f1_score on *** RULE COVERED TEST SET *** of SNORKEL VOTING: {}".format(snork_f1_score_test))
print("support on *** RULE COVERED TEST SET *** of SNORKEL VOTING: {}".format(snork_support_test))
print("accuracy on *** RULE COVERED TEST SET *** of SNORKEL VOTING: {}".format(snork_accuracy_test))

Пример #10

Показать файл

    lf.lf_spacy_words_sexism, lf.lf_keyword_raicism, lf.lf_spacy_words_gpe,
    lf.lf_keyword_shaming, lf.lf_spacy_threat, lf.lf_spacy_terrorism,
    lf.lf_neg_nonehumansubject
]
# Unused ones :
# lf.lf_spacy_animals, lf.lf_spacy_politics,  # giving false positives

# Apply the LFs to the unlabeled training data
applier = PandasLFApplier(lfs)
L_train = applier.apply(df_train)

# Train the label model and compute the training labels
# Cardinality was 2. Got : ValueError: L_train has cardinality 3, cardinality=2 passed in.
label_model = LabelModel(cardinality=3, verbose=True)
label_model.fit(L_train, n_epochs=500, log_freq=50, seed=123)
df_train["label"] = label_model.predict(L=L_train, tie_break_policy="abstain")

#output
df_train.to_csv('labelledDataset.csv', index=None, header=True)

# Filter out useless data
df_train = df_train[df_train.label != ABSTAIN]
print("Useful data remaining: " + str(df_train.shape[0]))

# Ignoring Transformation Functions for Data Augmentation for now...
# TODO: create transformation functions for different categories of hatespeech

# Ignoring slicing, don't think we need it

# Training a Classifier
docs = df_train.iloc[:, 0].tolist()  # first column of data frame (first_name)

Пример #11

Показать файл

# ## Train LabelModel And Generate Probabilistic Labels

# %% {"tags": ["md-exclude-output"]}
from snorkel.labeling import LabelModel

# Train LabelModel.
label_model = LabelModel(cardinality=2, verbose=True)
label_model.fit(L_train, n_epochs=100, seed=123, log_freq=20, l2=0.1, lr=0.01)

# %% [markdown]
# As a spot-check for the quality of our LabelModel, we'll score it on the dev set.

# %%
from snorkel.analysis import metric_score

preds_dev = label_model.predict(L_dev)

acc = metric_score(Y_dev, preds_dev, probs=None, metric="accuracy")
print(f"LabelModel Accuracy: {acc:.3f}")

# %% [markdown]
# We see that we get very high accuracy on the development set.
# This is due to the abundance of high quality crowdworker labels.
# **Since we don't have these high quality crowdsourcing labels for the
# test set or new incoming data points, we can't use the LabelModel reliably
# at inference time.**
# In order to run inference on new incoming data points, we need to train a
# discriminative model over the tweets themselves.
# Let's generate a set of labels for that training set.

# %%

Пример #12

Показать файл

# %%
label_model.score(L_valid, Y_valid, metrics=["f1_micro"])

# %% [markdown]
# ## 4. Train a Classifier
# You can then use these training labels to train any standard discriminative model, such as [an off-the-shelf ResNet](https://github.com/KaimingHe/deep-residual-networks), which should learn to generalize beyond the LF's we've developed!

# %% [markdown]
# #### Create DataLoaders for Classifier

# %%
from snorkel.classification import DictDataLoader
from model import SceneGraphDataset, create_model

df_train["labels"] = label_model.predict(L_train)

if sample:
    TRAIN_DIR = "data/VRD/sg_dataset/samples"
else:
    TRAIN_DIR = "data/VRD/sg_dataset/sg_train_images"

dl_train = DictDataLoader(
    SceneGraphDataset("train_dataset", "train", TRAIN_DIR, df_train),
    batch_size=16,
    shuffle=True,
)

dl_valid = DictDataLoader(
    SceneGraphDataset("valid_dataset", "valid", TRAIN_DIR, df_valid),
    batch_size=16,

Пример #13

Показать файл

LFAnalysis(L=L_train, lfs=lfs).lf_summary()

# In[12]:

print(
    f"Training set coverage: {100 * LFAnalysis(L_train).label_coverage(): 0.001f}%"
)

# In[15]:

from snorkel.labeling import LabelModel

# Train LabelModel.
label_model = LabelModel(cardinality=2, verbose=True)
label_model.fit(L_train, n_epochs=150, seed=125, log_freq=30, l2=0.1, lr=0.01)

# In[16]:

label = label_model.predict(L_train)

# In[18]:

len(label)

# In[26]:

# In[ ]:

with open('flabel.pkl', 'wb') as f:
    pickle.dump(label, f)