Пример #1
0
def test_fit():
    """Test for fitting the model."""
    X = rnd.randn(10, 2)
    y = np.hstack((-np.ones((5, )), np.ones((5, ))))
    Z = rnd.randn(10, 2) + 1
    clf = SubspaceAlignedClassifier()
    clf.fit(X, y, Z)
    assert clf.is_trained
Пример #2
0
def test_subspace_alignment():
    """Test the alignment between datasets."""
    X = rnd.randn(100, 10)
    Z = np.dot(rnd.randn(100, 10), np.diag(np.arange(1, 11)))
    clf = SubspaceAlignedClassifier()
    V, CX, CZ = clf.subspace_alignment(X, Z, num_components=3)
    assert not np.any(np.isnan(V))
    assert CX.shape[1] == 3
    assert CZ.shape[1] == 3
Пример #3
0
def main(source, target, model, target_train_ratio, random_state):

    params = {
        'source': source,
        'target': target,
        'target_train_ratio': target_train_ratio,
        'max_features': 5000,
        'random_state': random_state
    }

    params['partition'] = 'tr'
    tr_X, tr_y = get_data(AmazonDatasetCombined(**params))

    params['partition'] = 'te'
    te_X, te_y = get_data(AmazonDatasetCombined(**params))

    tr_y = tr_y.reshape(-1)
    te_y = te_y.reshape(-1)

    if model == 'lr':
        C = 0.2
        clf = LogisticRegression(solver='lbfgs', max_iter=1000, C=C)
        clf.fit(tr_X, tr_y)

    elif model == 'svm':
        C = 0.2
        clf = LinearSVC(C=C)
        clf.fit(tr_X, tr_y)

    elif model == 'kmm':
        clf = ImportanceWeightedClassifier(iwe='kmm')
        clf.fit(tr_X, tr_y, te_X)

    elif model == 'suba-lr':
        clf = SubspaceAlignedClassifier(loss='logistic')
        clf.fit(tr_X, tr_y, te_X)

    elif model == 'suba-hi':
        clf = SubspaceAlignedClassifier(loss='hinge')
        clf.fit(tr_X, tr_y, te_X)

    elif model == 'tca-lr':
        clf = TransferComponentClassifier(loss='logistic')
        clf.fit(tr_X, tr_y, te_X)

    elif model == 'tca-hi':
        clf = TransferComponentClassifier(loss='hinge')
        clf.fit(tr_X, tr_y, te_X)

    else:
        raise Exception('Unknown model called..')

    tr_score = accuracy_score(tr_y, clf.predict(tr_X))
    te_score = accuracy_score(te_y, clf.predict(te_X))

    return tr_score, te_score
Пример #4
0
def test_predict():
    """Test for making predictions."""
    X = rnd.randn(10, 2)
    y = np.hstack((-np.ones((5, )), np.ones((5, ))))
    Z = rnd.randn(10, 2) + 1
    clf = SubspaceAlignedClassifier()
    clf.fit(X, y, Z)
    u_pred = clf.predict(Z)
    labels = np.unique(y)
    assert len(np.setdiff1d(np.unique(u_pred), labels)) == 0
Пример #5
0
def apply_ENSEMBLE(trainX, trainY, testX, testY, window, source_pos, target_pos):
    classifier_SA_DT = SubspaceAlignedClassifier(loss="dtree")
    classifier_SA_LR = SubspaceAlignedClassifier(loss="logistic")
    classifier_SA_NB = SubspaceAlignedClassifier(loss="berno")
    classifier_TCA_DT = TransferComponentClassifier(loss="dtree")
    classifier_TCA_LR = TransferComponentClassifier(loss="logistic")
    classifier_TCA_NB = TransferComponentClassifier(loss="berno")
    classifier_NN_DT = ImportanceWeightedClassifier(iwe='nn', loss="dtree")
    classifier_NN_LR = ImportanceWeightedClassifier(iwe='nn', loss="logistic")
    classifier_NN_NB = ImportanceWeightedClassifier(iwe='nn', loss="berno")
    classifier_KMM_DT = ImportanceWeightedClassifier(iwe='kmm', loss="dtree")
    classifier_KMM_LR = ImportanceWeightedClassifier(iwe='kmm', loss="logistic")
    classifier_KMM_NB = ImportanceWeightedClassifier(iwe='kmm', loss="berno")
    #
    eclf = EnsembleClassifier(clfs=[ 
        #classifier_SA_DT,
        #classifier_SA_LR,
        #classifier_SA_NB,

        #classifier_TCA_DT,
        #classifier_TCA_LR,
        classifier_TCA_NB,

        classifier_NN_DT,
        #classifier_NN_LR,
        #classifier_NN_NB,

        classifier_KMM_DT,
        classifier_KMM_LR,
        #classifier_KMM_NB
         ])
    eclf.fit(trainX, trainY, testX)
    pred = eclf.predict(testX)
    acc_ENSEMBLE, acc_ENSEMBLE_INFO = check_accuracy(testY, pred)
    #
    return pd.DataFrame(
        [{ 
        'window': window,
        'source_position': source_pos,
        'target_position': target_pos,

        'acc_ENSEMBLE': acc_ENSEMBLE,  
        'acc_ENSEMBLE_INFO': acc_ENSEMBLE_INFO,                                            
        }]
    )
Пример #6
0
def model_build(classifier, trian_features, train_labels, test_features):
    if classifier == "IW":
        # Call an importance-weighted classifier
        # pipe = make_pipeline(StandardScaler(), ImportanceWeightedClassifier(iwe='nn'))
        # param_grid = [{}]
        # model = GridSearchCV(pipe, param_grid, cv=3)
        # model.fit(trian_features, train_labels, test_features)
        # print(model.best_params_)
        model = ImportanceWeightedClassifier(iwe='kmm')
        model.fit(preprocessing.scale(trian_features), train_labels,
                  preprocessing.scale(test_features))

    elif classifier == "SUBA":
        # Classifier based on subspace alignment
        model = SubspaceAlignedClassifier(loss_function='logistic')
        model.fit(preprocessing.scale(trian_features), train_labels,
                  preprocessing.scale(test_features))

    elif classifier == "TCPR":
        # Target Contrastive Pessimistic Classifier
        model = TargetContrastivePessimisticClassifier(l2=0.1)
        model.fit(preprocessing.scale(trian_features), train_labels,
                  preprocessing.scale(test_features))

    else:
        if classifier == "LR":
            pipe = make_pipeline(StandardScaler(), LogisticRegression())
            param_grid = [{'logisticregression__C': [1, 10, 100]}]
        elif classifier == "SVM":
            # pipe = make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5)) SVC(kernel='linear',probability=True)
            pipe = make_pipeline(StandardScaler(),
                                 SVC(kernel='linear', probability=True))
            param_grid = [{'svc__C': [0.01, 0.1, 1]}]
        elif classifier == "RF":
            pipe = make_pipeline(StandardScaler(),
                                 RandomForestClassifier(max_features='sqrt'))
            param_grid = {
                'randomforestclassifier__n_estimators': range(230, 300, 10),
                'randomforestclassifier__max_depth': range(8, 12, 1),
                'randomforestclassifier__min_samples_leaf': range(1, 5, 1),
                'randomforestclassifier__max_features': range(1, 20, 1)

                # 'learning_rate': np.linspace(0.01, 2, 20),
                # 'subsample': np.linspace(0.7, 0.9, 20),
                # 'colsample_bytree': np.linspace(0.5, 0.98, 10),
                # 'min_child_weight': range(1, 9, 1)
            }
        model = GridSearchCV(pipe, param_grid, cv=3)
        model.fit(trian_features, train_labels)
        print(model.best_params_)
    # save the model
    model_file_name = classifier + time.strftime("%m%d-%H%M%S") + ".model"
    joblib.dump(filename=model_file_name, value=model)
    return model
Пример #7
0
def apply_SA(trainX, trainY, testX, testY, window, source_pos, target_pos):
    # Decision Tree
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_SA, acc_DT_SA_INFO = check_accuracy(testY, pred_naive)
    # Logistic Regression
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_SA, acc_LR_SA_INFO = check_accuracy(testY, pred_naive)
    # Naive Bayes Bernoulli        
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_SA, acc_NB_SA_INFO = check_accuracy(testY, pred_naive)
    #
    return pd.DataFrame(
            [{ 
            'window': window,
            'source_position': source_pos,
            'target_position': target_pos,

            'acc_LR_SA': acc_LR_SA,
            'acc_LR_SA_INFO': str(acc_LR_SA_INFO),                                              
            'acc_DT_SA': acc_DT_SA,
            'acc_DT_SA_INFO': str(acc_DT_SA_INFO),                                                                            
            'acc_NB_SA': acc_NB_SA,
            'acc_NB_SA_INFO': str(acc_NB_SA_INFO),                                                
            }]
        )
Пример #8
0
def build_models(trainX, trainY, testX, testY, source_pos, target_pos, window):
    #######################
    ### SEMI-SUPERVISED ###
    ########################
    # Label Propagation
    label_prop_model = LabelPropagation(kernel='knn')
    label_prop_model.fit(trainX, trainY)
    Y_Pred = label_prop_model.predict(testX)
    acc_ss_propagation, acc_ss_propagation_INFO = checkAccuracy(testY, Y_Pred)
    # Label Spreading
    label_prop_models_spr = LabelSpreading(kernel='knn')
    label_prop_models_spr.fit(trainX, trainY)
    Y_Pred = label_prop_models_spr.predict(testX)
    acc_ss_spreading, acc_ss_spreading_INFO = checkAccuracy(testY, Y_Pred)
    ########################
    #### WITHOUT TL ########
    ########################
    # LogisticRegression
    modelLR = LogisticRegression()
    modelLR.fit(trainX, trainY)
    predLR = modelLR.predict(testX)
    accLR, acc_LR_INFO = checkAccuracy(testY, predLR)
    # DecisionTreeClassifier
    modelDT = tree.DecisionTreeClassifier()
    modelDT.fit(trainX, trainY)
    predDT = modelDT.predict(testX)
    accDT, acc_DT_INFO = checkAccuracy(testY, predDT)
    # BernoulliNB
    modelNB = BernoulliNB()
    modelNB.fit(trainX, trainY)
    predND = modelNB.predict(testX)
    accNB, acc_NB_INFO = checkAccuracy(testY, predND)
    #
    print("WITHOUT TL ACC_LR:", accLR, " ACC_DT:", accDT, " ACC_NB:", accNB)
    ########################
    #### WITH TL ########
    ########################

    ####################################################
    ### Kernel Mean Matching (Huang et al., 2006)
    ###
    # Decision Tree
    print("\n Kernel Mean Matching (Huang et al., 2006) ")
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_KMM, acc_DT_KMM_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_KMM)
    # Logistic Regression
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_KMM, acc_LR_KMM_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_KMM)
    # Naive Bayes Bernoulli
    classifier = ImportanceWeightedClassifier(iwe='kmm', loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_KMM, acc_NB_KMM_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_KMM)
    ####################################################
    ### Nearest-neighbour-based weighting (Loog, 2015)
    ###
    # Decision Tree
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_NN, acc_DT_NN_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_NN)
    # Logistic Regression
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_NN, acc_LR_NN_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_NN)
    # Naive Bayes Bernoulli
    print("\n Nearest-neighbour-based weighting (Loog, 2015)    ")
    classifier = ImportanceWeightedClassifier(iwe='nn', loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_NN, acc_NB_NN_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_NN)

    ####################################################
    ### Transfer Component Analysis (Pan et al, 2009)
    ###
    # Decision Tree
    print("\n Transfer Component Analysis (Pan et al, 2009)")
    classifier = TransferComponentClassifier(loss="dtree", num_components=6)
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_TCA, acc_DT_TCA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_TCA)
    # Logistic Regression
    classifier = TransferComponentClassifier(loss="logistic", num_components=6)
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_TCA, acc_LR_TCA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_TCA)
    # Naive Bayes Bernoulli
    classifier = TransferComponentClassifier(loss="berno", num_components=6)
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_TCA, acc_NB_TCA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_TCA)

    ####################################################
    ### Subspace Alignment (Fernando et al., 2013)
    ###
    # Decision Tree
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="dtree")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_DT_SA, acc_DT_SA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_DT_SA)
    # Logistic Regression
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="logistic")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_LR_SA, acc_LR_SA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_LR_SA)
    # Naive Bayes Bernoulli
    print("\n Subspace Alignment (Fernando et al., 2013) ")
    classifier = SubspaceAlignedClassifier(loss="berno")
    classifier.fit(trainX, trainY, testX)
    pred_naive = classifier.predict(testX)
    acc_NB_SA, acc_NB_SA_INFO = checkAccuracy(testY, pred_naive)
    print("ACC:", acc_NB_SA)
    #################################
    ############# ENSEMBLE ##########
    #################################
    classifier_SA_DT = SubspaceAlignedClassifier(loss="dtree")
    classifier_SA_LR = SubspaceAlignedClassifier(loss="logistic")
    classifier_SA_NB = SubspaceAlignedClassifier(loss="berno")
    classifier_TCA_DT = TransferComponentClassifier(loss="dtree")
    classifier_TCA_LR = TransferComponentClassifier(loss="logistic")
    classifier_TCA_NB = TransferComponentClassifier(loss="berno")
    classifier_NN_DT = ImportanceWeightedClassifier(iwe='nn', loss="dtree")
    classifier_NN_LR = ImportanceWeightedClassifier(iwe='nn', loss="logistic")
    classifier_NN_NB = ImportanceWeightedClassifier(iwe='nn', loss="berno")
    classifier_KMM_DT = ImportanceWeightedClassifier(iwe='kmm', loss="dtree")
    classifier_KMM_LR = ImportanceWeightedClassifier(iwe='kmm',
                                                     loss="logistic")
    classifier_KMM_NB = ImportanceWeightedClassifier(iwe='kmm', loss="berno")
    #
    eclf = EnsembleClassifier(
        clfs=[classifier_TCA_DT, classifier_NN_DT, classifier_KMM_DT])
    eclf.fit(trainX, trainY, testX)
    pred = eclf.predict_v2(testX)
    acc_ENSEMBLE, acc_ENSEMBLE_INFO = checkAccuracy(testY, pred)

    ########################
    #### RETURN ########
    ########################
    return pd.DataFrame([{
        'window': window,
        'source_position': source_pos,
        'target_position': target_pos,
        'acc_SS_propagation': acc_ss_propagation,
        'acc_SS_propagation_INFO': acc_ss_propagation_INFO,
        'acc_SS_spreading': acc_ss_spreading,
        'acc_SS_spreading_INFO': acc_ss_spreading_INFO,
        'acc_ENSEMBLE': acc_ENSEMBLE,
        'acc_LR': accLR,
        'acc_LR_INFO': str(acc_LR_INFO),
        'acc_DT': accDT,
        'acc_DT_INFO': str(acc_DT_INFO),
        'acc_NB': accNB,
        'acc_NB_INFO': str(acc_NB_INFO),
        'acc_LR_KMM': acc_LR_KMM,
        'acc_LR_KMM_INFO': str(acc_LR_KMM_INFO),
        'acc_LR_NN': acc_LR_NN,
        'acc_LR_NN_INFO': str(acc_LR_NN_INFO),
        'acc_LR_TCA': acc_LR_TCA,
        'acc_LR_TCA_INFO': str(acc_LR_TCA_INFO),
        'acc_LR_SA': acc_LR_SA,
        'acc_LR_SA_INFO': str(acc_LR_SA_INFO),
        'acc_DT_KMM': acc_DT_KMM,
        'acc_DT_KMM_INFO': str(acc_DT_KMM_INFO),
        'acc_DT_NN': acc_DT_NN,
        'acc_DT_NN_INFO': str(acc_DT_NN_INFO),
        'acc_DT_TCA': acc_DT_TCA,
        'acc_DT_TCA_INFO': str(acc_DT_TCA_INFO),
        'acc_DT_SA': acc_DT_SA,
        'acc_DT_SA_INFO': str(acc_DT_SA_INFO),
        'acc_NB_KMM': acc_NB_KMM,
        'acc_NB_KMM_INFO': str(acc_NB_KMM_INFO),
        'acc_NB_NN': acc_NB_NN,
        'acc_NB_NN_INFO': str(acc_NB_NN_INFO),
        'acc_NB_TCA': acc_NB_TCA,
        'acc_NB_TCA_INFO': str(acc_NB_TCA_INFO),
        'acc_NB_SA': acc_NB_SA,
        'acc_NB_SA_INFO': str(acc_NB_SA_INFO)
    }])
Пример #9
0
X_s_t = np.vstack([X_s, X_t_init])
y_s_t = np.concatenate([y_s, y_t_init])

clf_sup = RandomForestClassifier().fit(X_s_t, y_s_t)
pred = clf_sup.predict(X_test)
print(accuracy_score(y_test, pred))

clf_sup = MLPClassifier().fit(X_s_t, y_s_t)
pred = clf_sup.predict(X_test)
print(accuracy_score(y_test, pred))

clf_sup = DecisionTreeClassifier().fit(X_s_t, y_s_t)
pred = clf_sup.predict(X_test)
print(accuracy_score(y_test, pred))

clf_sup = SVC().fit(X_s_t, y_s_t)
pred = clf_sup.predict(X_test)
print(accuracy_score(y_test, pred))

# ENCO learning
clf_sup = encolearning.EnCoLearning(iteration=20).fit(X_s_t, y_s_t, X_t)
pred = clf_sup.predict(X_test)
print(accuracy_score(y_test, pred))

#%% TCA
clf_tca = SubspaceAlignedClassifier(num_components=10)
clf_tca.fit(X_s, y_s.reshape(len(y_s), ), X_t)

pred = clf_tca.predict(X_test)
acc = accuracy_score(y_test, pred)
print(acc)
Пример #10
0
def test_init():
    """Test for object type."""
    clf = SubspaceAlignedClassifier()
    assert type(clf) == SubspaceAlignedClassifier
    assert not clf.is_trained