示例#1
0
    def label_spread(self, X_train, y_train, gamma = None, max_iter = None):
        """
        Train Label Spreading model from scikit-learn

        Parameters
        __________
        X_train: Scaled training data
        y_train: Class label
        gamma: Parameter for rbf kernel
        max_iter: Maximum number of iterations allowed

        Returns
        ________
        Predicted labels and probability
        """
        # Label spreading model
        model = LabelSpreading(kernel='rbf', gamma = gamma, max_iter = max_iter, n_jobs= -1)

        # Fit the training set
        model.fit(X_train, y_train)

        # Predict the labels of the unlabeled data points
        predicted_labels = model.transduction_

        # Predict probability
        predicted_proba = model.predict_proba(X_train)
        return predicted_labels, predicted_proba
示例#2
0
    def source_to_target_label_prop(self,
                                    train_feat_space='embeds',
                                    kernel_param={
                                        'type': 'rbf',
                                        'gamma': 20
                                    }):
        print(
            '-----------------------------------------------------------------------'
        )
        print('Propagating labels from source to target in {0} space'.format(
            train_feat_space))
        if train_feat_space == 'encoded':
            if not hasattr(self, 'source_encoded_reps'):
                self.dim_red_autoencode()
            concat_embs = np.concatenate(
                (self.source_encoded_reps, self.target_encoded_reps))
        elif train_feat_space == 'embeds':
            concat_embs = np.concatenate(
                (self.source_embds_vec, self.target_embds_vec))
        elif train_feat_space == 'embeds_tsne':
            if self.tsne_computed == 0:
                self.compute_tsne()
            feat_cols = []
            for idx in range(self.n_tsne_components):
                feat_cols.append('embeds_tsne_' + str(idx))
            source_data_feats = self.source_data[feat_cols].as_matrix()
            target_data_feats = self.target_data[feat_cols].as_matrix()
            concat_embs = np.concatenate(
                (source_data_feats, target_data_feats))
        else:
            raise NotImplemented
        unknown_labels = np.ones_like(self.target_labels) * -1
        label_prop_train_labels = np.concatenate(
            (self.source_labels, unknown_labels))
        lp_model = LabelSpreading()
        lp_model.fit(concat_embs, label_prop_train_labels)
        transduction_labels = lp_model.transduction_
        label_distributions = lp_model.label_distributions_

        print(label_distributions[0:10, :])
        self.source_data[
            train_feat_space +
            'Space_prop_pred'] = transduction_labels[:self.n_source]
        self.target_data[
            train_feat_space +
            'Space_prop_pred'] = transduction_labels[self.n_source:]
        # self.source_data[train_feat_space+'label_prop_groups'] = label_distributions[:self.n_source]
        # self.target_data[train_feat_space + 'label_prop_groups'] = label_distributions[self.n_source:]

        # self.embds_space_grouping.append(train_feat_space + 'label_prop_groups')
        # self.embds_space_classifiers.append(train_feat_space+'Space_prop_pred')
        if self.inter_save:
            print('Saving propagated labels')
            self.save_perforamance(self.serving_dir, suffix=self.save_suffix)

        print('Completed source to target label propagation in {0} space'
              ).format(train_feat_space)
        print(
            '-----------------------------------------------------------------------'
        )
示例#3
0
def test_generate_algokey():
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import AdaBoostClassifier
    from sklearn.semi_supervised import LabelSpreading

    n_weaks = 50
    tree_depth = 3
    K = 6

    rfc = RandomForestClassifier(n_estimators=n_weaks, max_depth=tree_depth)
    knn = KNeighborsClassifier(n_neighbors=K)
    dtc = DecisionTreeClassifier(max_depth=tree_depth)
    abc = AdaBoostClassifier(
        base_estimator=DecisionTreeClassifier(max_depth=tree_depth),
        n_estimators=n_weaks,
    )
    lsc = LabelSpreading(kernel="knn")

    funcRes = [generate_algokey(clf) for clf in [rfc, knn, dtc, abc, lsc]]
    trueRes = [
        'RandomForest_M50_D3',
        '6NearestNeighbors',
        'DecisionTree_D3',
        'AdaBoost_M50_D3',
        'LabelSpreading_knn',
    ]

    assert funcRes == trueRes
示例#4
0
def test_LabelSpreading_rbf(*data):
    x, y, unlabeled_indices = data
    y_train = np.copy(y)
    y_train[unlabeled_indices] = -1
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    alphas = np.linspace(0.01, 1, num=10, endpoint=True)
    gammas = np.logspace(-2, 2, num=50)
    colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0.5, 0), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0.6, 0), (0.6, 0.4, 0) \
                  , (0, 0.6, 0.4), (0.5, 0.3, 0.2))  # 颜色集合,不同的曲线用不同的颜色
    # 训练并绘图
    for alpha, color in zip(alphas, colors):
        scores = []
        for gamma in gammas:
            clf = LabelSpreading(max_iter=100, gamma=gamma, alpha=alpha, kernel='rbf')
            clf.fit(x, y_train)
            scores.append(clf.score(x[unlabeled_indices], y[unlabeled_indices]))
        ax.plot(gammas, scores, label=r"$\alpha=%s$" % alpha, color=color)
    # 设置图形
    ax.set_xlabel(r"$\gamma$")
    ax.set_ylabel("score")
    ax.set_xscale("log")
    ax.legend(loc='best')
    ax.set_title("LabelSpreading rbf kernel")
    plt.show()
示例#5
0
def semiLabelSpreding(feature_extractor, generator, val_generator, kernel,
                      neighbors, gamma, alpha):
    semi = LabelSpreading(kernel=kernel,
                          n_neighbors=neighbors,
                          gamma=gamma,
                          alpha=alpha,
                          tol=0.001,
                          max_iter=1000000)

    features = feature_extractor.predict_generator(generator,
                                                   steps=generator.samples /
                                                   generator.batch_size,
                                                   verbose=1)

    classes = generator.classes

    for i in range(0, generator.samples):
        if (generator.filenames[i][0] == 'N'):
            classes[i] = -1

    semi.fit(features, classes)

    val_features = feature_extractor.predict_generator(
        val_generator,
        steps=val_generator.samples / val_generator.batch_size,
        verbose=1)
    predicted_classes = semi.predict(val_features)

    return predicted_classes
示例#6
0
def testLabelPropagation():
    from sklearn.semi_supervised import LabelSpreading
    from sklearn import preprocessing
    label_enc = preprocessing.LabelEncoder()

    label_prop_model = LabelSpreading(kernel='knn')
    train_iter = getDocumentIterator1("published = 0 and is_test = 1")
    validation_iter = getDocumentIterator1("published = 1 and is_test = 1")
    semantic_model = gensim_tests.SemanticModel.load(
        'gensim/full_corpus_300000')
    all_profiles, labels = [], []
    propagation_labels = []
    for doc in train_iter:
        all_profiles.append(semantic_model.inferProfile(doc.rawtext))
        labels.append(doc.learned_category[0])
        propagation_labels.append(doc.learned_category[0])

    label_enc.fit(propagation_labels)
    propagation_labels = label_enc.transform(propagation_labels).tolist()

    for doc in validation_iter:
        all_profiles.append(semantic_model.inferProfile(doc.rawtext))
        labels.append(doc.learned_category[0])
        propagation_labels.append(-1)
    print propagation_labels
    print "Fitting"
    label_prop_model.fit(all_profiles, propagation_labels)
    output_labels = label_prop_model.transduction_
    for propagated, orig in zip(label_enc.inverse_transform(output_labels),
                                labels):
        print propagated, orig
示例#7
0
def test_LabelSpreading_knn(*data):
    x, y, unlabeled_indices = data
    y_train = np.copy(y)
    y_train[unlabeled_indices] = -1
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    alphas = np.linspace(0.01, 1, num=10, endpoint=True)
    Ks = [1, 2, 3, 4, 5, 8, 10, 15, 20, 25, 30, 35, 40, 50]
    colors = (
    (1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0.5, 0), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0.6, 0), (0.6, 0.4, 0), \
    (0, 0.6, 0.4), (0.5, 0.3, 0.2))  # 颜色集合,不同的曲线用不同的颜色
    # 训练并绘图
    for alpha, color in zip(alphas, colors):
        scores = []
        for K in Ks:
            clf = LabelSpreading(max_iter=100, n_neighbors=K, alpha=alpha, kernel='knn')
            clf.fit(x, y_train)
            scores.append(clf.score(x[unlabeled_indices], y[unlabeled_indices]))
        ax.plot(Ks, scores, label=r"$\alpha=%s$" % alpha, color=color)
    # 设置图形
    ax.set_xlabel(r"k")
    ax.set_ylabel("score")
    ax.legend(loc='best')
    ax.set_title("LabelSpreading knn kernel")
    plt.show()
示例#8
0
def computeSimilarities2(vect, matrix_values, numLine, numRan=10):
    """ build the model with the semi supervised approach labelSpreading     
    Args:
    matrix_values: descriptor matrix i.e. all the probability of all ir models
    vect: the answer vector (value 0 for false links and 1 for true links);
    numLine: number of pairs of artefacts
  Returns:
    preds: probability that a pair of artefact is linked
  """
    #number of iterations

    allPrediction = []
    model = LabelSpreading()

    #compute multiple (10) random vector of the matrix_values
    for i in range(0, numRan):
        subVect, subMatrix_values = computeRandom(vect, matrix_values, numLine)
        #compute the prediction function of each random vector
        computeModel = model.fit(subMatrix_values, subVect)
        print("new predicted function computed")
        #compute the prediction of each pair of artefact with the random model
        preds0 = computeModel.predict_proba(matrix_values)
        allPrediction.append(preds0[:, 1])

    # by the "vote majoritaire"
    preds = vote(allPrediction, len(vect), numRan)

    print(preds)

    return preds
示例#9
0
    def train(self, inputs, targets, min_=0.01, max_=30, niter=10, stepsize=0.1):
        """
        Train the LP model given the data

        Parameters
        ----------
        inputs : nd-array
            independent variables
        targets : vector
            dependent variable
        min : float
            []
        max : float
            []
        niter : int
            number of training iterations
        stepsize : float
            []
        """
        # Scale the training data
        self.x = inputs
        self.y = targets

        # Tune gamma in RBF using basinhopping 
        self.gamma = self.optimize(min_, max_, niter, stepsize)[0]

        # Propogate labels
        self.model = LabelSpreading(kernel=self.kernel, alpha=self.alpha,
                                                      gamma=self.gamma)
        self.model.fit(self.x, self.y)
        if self.use_logger:
            self.logger.info("Label Propagation model trained with {} samples".format(len(self.y)))
示例#10
0
def run_lp_bow_runtime_vocabulary(nbr, str_list, neighbors):
    i = 0
    avg_f1 = 0
    avg_accuracy = 0
    while i < 10:
        dataset = Dataset(categories)
        dataset.load_preprocessed(categories)
        dataset.split_train_true(nbr)
        print_v2_test_docs_vocabulary_labeled(categories)
        dataset.load_preprocessed_test_vocabulary_labeled_in_use(categories)
        vectorizer = CountVectorizer(vocabulary=Vocabulary.get_vocabulary(categories))
        vectors = vectorizer.fit_transform(dataset.train['data'])

        clf = LabelSpreading(kernel='knn', n_neighbors=neighbors).fit(vectors.todense(), dataset.train['target'])
        test_vec = vectorizer.transform(dataset.test['data'])
        pred = clf.predict(test_vec.todense())
        avg_f1 += metrics.f1_score(dataset.test['target'], pred, average='macro')
        avg_accuracy += clf.score(test_vec.todense(), dataset.test['target'])
        i += 1
    avg_accuracy = avg_accuracy/10
    avg_f1 = avg_f1/10
    str_list.extend(["KNN BOW runtime voc Avg f1: " + avg_f1.__str__(), "KNN BOW runtime vod Avg acc: "
                     + avg_accuracy.__str__()])
    print("Avg f1: " + avg_f1.__str__())
    print("Avg acc: " + avg_accuracy.__str__())
示例#11
0
def soft_clamping(kernel, xTrain, yTrain, MI=10000, k=3, g=0.6, a=0.1):
    spread = LabelSpreading(kernel=kernel,
                            n_neighbors=k,
                            gamma=g,
                            alpha=a,
                            max_iter=MI,
                            n_jobs=-1)
    spread.fit(xTrain, yTrain)
    predY = spread.predict_proba(xTrain)

    norm_Y = normalize(yTrain, predY)
    labels = []
    for i in norm_Y:
        if i[0] > i[1]:
            labels.append(benign)
        elif i[0] < i[1]:
            labels.append(malware)

    lm_to_b, lb_to_m, tp, tn, fp, fn, pred_day1, missed_day1 = stats(
        yTrain, labels, yExpect, day_one)

    results = [
        'SC', kernel, k, g, a, lm_to_b, lb_to_m, tp, tn, fp, fn, pred_day1,
        missed_day1
    ]

    file_name = 'SC_CMN_5per_' + str(rate) + '.csv'
    write_csv(file_name, results)
示例#12
0
def label(filenames, train_path='../data/train_molecules_30.mat'):
    """
    Label data with the provided filenames.

    :param filenames: List of filenames containing data to label.
    :return: Newly labeled and conglomerate datasets
    """
    unlabeled = [scipy.io.loadmat(fname) for fname in filenames]
    unlabeled_X = np.vstack([data['X'] for data in unlabeled])
    X, Y = load_data(train_path, shape=(-1, 30, 30, 30))

    num_unlabeled = unlabeled_X.shape[0]
    unlabeled_Y = np.zeros(num_unlabeled) - 1
    unlabeled_Y = unlabeled_Y.reshape((-1, 1))
    Y = Y.reshape((-1, 1))
    Y_all = np.vstack((Y, unlabeled_Y))

    X_all = np.vstack((X, unlabeled_X))
    X_all = X_all.reshape((-1, 27000))

    label_prop_model = LabelSpreading()
    label_prop_model.fit(X_all, Y_all)
    Y_all = label_prop_model.transduction_
    unlabeled_Y = Y_all[num_unlabeled:]
    return (unlabeled_X, unlabeled_Y), (X_all, Y_all)
示例#13
0
 def __init__(self,
              method="spreading",
              kernel="knn",
              alpha=0.2,
              gamma=20,
              n_neighbors=7,
              **kwargs):
     super(LabSP, self).__init__(**kwargs)
     if method.lower() == "propagation":
         self.regressors = [
             LabelPropagation(kernel=kernel,
                              alpha=alpha,
                              gamma=gamma,
                              n_neighbors=n_neighbors)
             for _ in range(len(self.regions))
         ]
     elif method.lower() == "spreading":
         self.regressors = [
             LabelSpreading(kernel=kernel,
                            alpha=alpha,
                            gamma=gamma,
                            n_neighbors=n_neighbors)
             for _ in range(len(self.regions))
         ]
     else:
         raise InitializationError("Method %s not valid" % method)
def label_spreading(x_train_all,
                    y_train_all,
                    cv_semisupervised,
                    kernel="knn",
                    alpha=0.2,
                    name="LabelSpreading",
                    only_model=False,
                    **kwargs):
    """ Label spreading - a semisupervised model.
    Parameters:
        x_train_all (pd.DataFrame): contains both the features of labelled and unlabelled data.
        y_train_all (pd.Series): contains the labels of the labelled and unlabelled data. Unlabelled data must have label -1.
        cv_semisupervised (list): List of training and testing tuples which contain the indiced for the different folds.
        kernel (str): can be either "rbf" or "knn"
        alpha (float): clamping factor, between 0  and 1 - how strong should a datapoint adopt to its neighbor information? 0 mean not all, 1 means completely.
        name (str): Name/Description for the model.
        only_model (bool): if True returns only the model
    Returns:
        dict: results from cross validation, inclusive probability based crossvalidation
    """
    # TODO cv: use the same cv split but randomly assign the other unlabelled data pieces to the other cv folds
    ls_model = LabelSpreading(kernel=kernel,
                              alpha=alpha,
                              n_jobs=-1,
                              max_iter=100).fit(x_train_all, y_train_all)
    #y_pred = ls_model.predict(x_train)

    if only_model:
        return ls_model

    return calculate_metrics_cv(model=ls_model,
                                X=x_train_all,
                                y_true=y_train_all,
                                cv=cv_semisupervised,
                                name=name)
示例#15
0
    def predict_ssl(self, x_sup, y_sup, x_unsup, y_unsup, x_test, y_test):

        ls_model = LabelSpreading(kernel='knn', n_neighbors=5)
        indices = np.arange(self.train_size)
        unlabeled_indices = indices[x_sup.shape[0]:]
        y_sup_unsup = np.concatenate([y_sup, y_unsup])
        y_sup_unsup_train = np.copy(y_sup_unsup)
        y_sup_unsup_train[unlabeled_indices] = -1

        x_fit = np.concatenate([x_sup, x_unsup], axis=0)
        h_fit = self.model_e.predict(x_fit)
        h_fit = np.reshape(h_fit,
                           (h_fit.shape[0], h_fit.shape[1] * h_fit.shape[2]))
        ls_model.fit(h_fit, y_sup_unsup_train)
        y_unsup_pred = ls_model.transduction_[unlabeled_indices]

        #print("LabelSpread Accuracy is ", accuracy_score(y_unsup, y_unsup_pred))

        h_test = self.model_e.predict(x_test)
        h_test = np.reshape(
            h_test, (h_test.shape[0], h_test.shape[1] * h_test.shape[2]))

        #SVM
        clf_svc = svm.SVC(kernel='linear')
        y_fit_true = ls_model.transduction_
        clf_svc.fit(h_fit, y_fit_true)
        acc_svm = accuracy_score(y_test, clf_svc.predict(h_test))

        clf_svc = svm.LinearSVC()
        clf_svc.fit(h_fit, y_fit_true)
        acc_svm_linear = accuracy_score(y_test, clf_svc.predict(h_test))
        print('acc_svm is ', max(acc_svm, acc_svm_linear))
 def doLabelSpreading(self,X,y,**kwargs):
     label_spread_model = LabelSpreading(**kwargs)
     if self.verbose>2: 
         print("X, y shapes: ",X.shape,y.shape)
         print(" y hist: ",np.histogram(y))
     label_spread_model.fit(X, y)
     if self.verbose>2: print("ls_predict:",np.histogram(label_spread_model.predict(X)) )
     return label_spread_model.predict_proba(X)
示例#17
0
    def label_spr(self):

        RESULT_ACC_SS = 0

        for i in range(self.manyfit):

            #Initialisinig of variables:
            self.init_variables()

            #PCA preprocessing:
            if (self.PCA_MODE): self.pca_preprocess(self.pca)

            #Semi supervised algo
            if (self.ss_mod == 'LabSpr' and self.ss_kern == 'knn'):
                self.label_prop_model = LabelSpreading(
                    kernel='knn',
                    gamma=self.gamma,
                    n_neighbors=self.neighbors,
                    alpha=self.alpha)

            elif (self.ss_mod == 'LabProp' and self.ss_kern == 'rbf'):
                self.label_prop_model = LabelPropagation(
                    kernel='rbf',
                    gamma=self.gamma,
                    n_neighbors=self.neighbors,
                    alpha=self.alpha,
                    max_iter=10)
            else:
                self.label_prop_model = LabelPropagtion(
                    kernel=self.ss_kern,
                    gamma=self.gamma,
                    n_neighbors=self.neighbors)

            print('Starting to fit. Run for shelter!')

            self.label_prop_model.fit(self.X_tot, self.y_tot)

            temp_acc = self.label_prop_model.score(self.X_valid_lab,
                                                   self.y_valid)

            print('{} / {} :accuracy = {}'.format(i, self.manyfit, temp_acc))

            RESULT_ACC_SS += temp_acc

        self.y_tot = self.label_prop_model.transduction_

        self.y_submit = self.label_prop_model.predict(self.X_submit)

        if (self.datastate == "save"):
            self.save_to_csv(self.X_tot, self.y_tot, self.X_valid_lab,
                             self.y_valid)

        RESULT_ACC_SS /= self.manyfit

        self.json_dict['ss_accuracy'] = RESULT_ACC_SS

        print('accuracy obtained on the test set of the ss algo:',
              RESULT_ACC_SS)
示例#18
0
def test_LabelSpreading(*data):
    x, y, unlabeled_indices = data
    y_train = np.copy(y)
    y_train[unlabeled_indices] = -1
    clf = LabelSpreading(max_iter=100, kernel='rbf', gamma=0.1)
    clf.fit(x, y_train)
    predicted_labels = clf.transduction_[unlabeled_indices]
    true_labels = y[unlabeled_indices]
    print("Accuracy: %f" % metrics.accuracy_score(true_labels, predicted_labels))
 def LabelSpreadingWrapper(X_train, y_train, X_test):
     clf = LabelSpreading(kernel='knn',
                          n_neighbors=10,
                          n_jobs=-1,
                          max_iter=1000,
                          alpha=0.1)
     newlabels = np.concatenate((np.array(y_train), -np.ones(len(X_test))))
     clf.fit(np.concatenate((X_train, X_test)), newlabels)
     return clf.transduction_[-len(X_test):]
 def label_spreading(self, X_train, y, X_test):
     clf = LabelSpreading()
     X = np.concatenate((X_train.todense(), X_test.todense()), axis=0)
     print("X shape now ", X.shape)
     print("Y shape now ", y.shape)
     clf.fit(X, y)
     final_labels = clf.predict(X_test)
     label_prob = clf.predict_proba(X_test)
     print(compare_labels_probabilities().compare(label_prob, final_labels))
     return final_labels, clf
示例#21
0
def test_LabelSpreading(*data):
    X,y,unlabeled_indices = data
    y_train = np.copy(y)
    y_train[unlabeled_indices] = -1
    clf = LabelSpreading(max_iter=1000, kernel='knn',gamma = 0.1)
    clf.fit(X,y_train)
    true_labels = y[unlabeled_indices]
    predicted_labels = clf.transduction_[unlabeled_indices]
    print('Accuracy : %f' %clf.score(X[unlabeled_indices],true_labels))
    print('Accuracy : %f' %metrics.accuracy_score(true_labels,predicted_labels))
示例#22
0
def labelSpreading(x, y):
    scores = []
    mses = []
    for gamma in tensValues:
        model = LabelSpreading(gamma=gamma)
        accuracy, mse = runSet(model, x, y)
        scores.append(accuracy)
        mses.append(mse)
    showMSEGraph(tensValues, scores, mses, "gamma", model.__class__.__name__)
    addModelComparison(model, max(scores))
示例#23
0
 def __init__(self):
     np.random.seed(1102)
     self.model = LabelSpreading(
         kernel="rbf",
         n_jobs=int(np.max([multiprocessing.cpu_count() - 2, 1])),
         alpha=0.2,
         n_neighbors=10,
         max_iter=15)
     self.name = "LABEL-SPREADING"
     self.scaler = MinMaxScaler()
def semi_supervised():
    features, labels = separate_cols_with_unknown(gtd)
    features = process_nontext(features)
    features = convertDType(features)
    model = LabelPropagation(kernel="knn")
    model2 = LabelSpreading(kernel="knn")
    model2.fit(features, labels)
    preds = cross_val_predict(model2, features, labels, cv=5)
    print('5 fold cross val accuracy of model: %0.2f ' %
          accuracy_score(labels, preds))
示例#25
0
 def __init__(self, lmnn=False, max_iter=1000, lm_num=200):
     # self.clf =  LabelPropagation(kernel='knn',max_iter=1000,n_jobs=10,n_neighbors=25)
     self.clf = LabelSpreading(kernel='knn',
                               n_neighbors=25,
                               max_iter=max_iter,
                               alpha=0.2,
                               n_jobs=-1)
     self.lmnn = lmnn
     self.lm_num = lm_num
     if lmnn:
         self.ml = LMNN(use_pca=False, max_iter=2000)
示例#26
0
    def __init__(self,
                 balanced=False,
                 visual_expansion_use=True,
                 re_score_alpha=0.15,
                 re_score_proportional=True,
                 regions=None,
                 ve_estimators=20,
                 ve_leafs=5,
                 clf_estimators=20,
                 clf_leafs=1,
                 method="spreading",
                 kernel="knn",
                 lab_alpha=0.2,
                 lab_gamma=20,
                 lab_n_neighbors=7,
                 weights=None):
        super(SAL, self).__init__(balanced=balanced,
                                  visual_expansion_use=visual_expansion_use,
                                  re_score_alpha=re_score_alpha,
                                  re_score_proportional=re_score_proportional,
                                  regions=regions,
                                  ve_estimators=ve_estimators,
                                  ve_leafs=ve_leafs)
        if not weights:
            # self.weights = [0.45, 0.45, 0.1]
            self.weights = [0.5, 0.5, 0.]
        else:
            self.weights = weights

        self.cluster_forest = [
            RandomTreesEmbedding(n_estimators=clf_estimators,
                                 min_samples_leaf=clf_leafs,
                                 n_jobs=-1) for _ in range(len(self.regions))
        ]
        self.affinity_matrix = []
        self.feature_matcher = None  # Initialized when set_ex
        if method.lower() == "propagation":
            self.regressors = [
                LabelPropagation(kernel=kernel,
                                 alpha=lab_alpha,
                                 gamma=lab_gamma,
                                 n_neighbors=lab_n_neighbors)
                for _ in range(len(self.regions))
            ]
        elif method.lower() == "spreading":
            self.regressors = [
                LabelSpreading(kernel=kernel,
                               alpha=lab_alpha,
                               gamma=lab_gamma,
                               n_neighbors=lab_n_neighbors)
                for _ in range(len(self.regions))
            ]
        else:
            raise InitializationError("Method %s not valid" % method)
示例#27
0
    def get_clf(self):
        if self.clf_method == 'LR':
            semi_supervised = False
            clf = LogisticRegression(max_iter=200)
        elif self.clf_method == 'LP':
            semi_supervised = True
            clf = LabelSpreading()
        else:
            raise ValueError("Non-implemented method")

        return clf, semi_supervised
示例#28
0
    def fit(self):
        #Need to concatenate labeled and unlabeled data
        #unlabeled data labels are set to -1
        X = np.concatenate(
            (self.val_primitive_matrix, self.train_primitive_matrix))
        val_labels = (self.val_ground + 1) / 2.
        train_labels = -1. * np.ones(np.shape(self.train_primitive_matrix)[0])
        y = np.concatenate((val_labels, train_labels))

        self.model = LabelSpreading(kernel='knn')
        self.model.fit(X, y)
示例#29
0
def knn(X, labels):
    # #############################################################################
    # Learn with LabelSpreading
    label_spread = LabelSpreading(kernel='knn', alpha=0.6, max_iter=100)
    label_spread.fit(X, labels)

    # #############################################################################
    # Plot output labels
    output_labels = label_spread.transduction_

    return output_labels
def propagate_labels(
    features,
    labels,
):
    label_prop_model = LabelSpreading(kernel=construct_graph, n_jobs=-1)
    label_prop_model.fit(features, labels)
    logger.debug(label_prop_model.classes_)
    # preds = label_prop_model.predict(features)
    preds = label_prop_model.predict_proba(features)
    # logger.debug(label_prop_model.classes_)

    return preds