示例#1
0
def check_cross_validation():
    scores = cross_val_score(KL,
                             Y,
                             EasyMKL(lam=0.1, kernel='precomputed'),
                             n_folds=3)
    assert len(scores) == 3
    pass
示例#2
0
def MKL():
    fname, pv, tv, org_metrics = experiment_setting()
    print(fname, pv, tv)

    list_pair_metrics = [["l1", "l2"]]

    for metrics in list_pair_metrics:
        X, y, sim_matrices = get_s_metric(fname=fname,
                                          tv=tv,
                                          pv=pv,
                                          metrics=metrics)

        # # from similarity to kernel matrix
        KL = [np.exp(s) / 0.01 for s in sim_matrices]
        KL_norm = [kernel_normalization(K) for K in KL]
        print(KL_norm, sim_matrices)

    # KLtr, KLte, Ytr, Yte = train_test_split(KL, Y, random_state=42, shuffle=True, test_size=.3)
    print(y)

    # # polynomial kernel
    # KL_norm = [hpk(X, degree=d) for d in range(1,11)]

    gamma_values = [0.001, 0.01, 0.1, 1, 10]

    lam_values = [0, 0.1, 0.2, 1]
    C_values = [0.01, 1, 100]
    # for lam in lam_values:
    # 	for gamma, C in product(gamma_values, C_values):
    # 	    svm = SVR(kernel="rbf", C=C, gamma=gamma)
    # 	    mkl = EasyMKL(lam=lam, learner=svm)
    # 	    scores = cross_val_score(KL_norm, y, mkl, n_folds=3, scoring='mae')
    # 	    print (lam, C, scores)

    for lam, C in product(lam_values, C_values):
        svm = SVC(C=C)
        mkl = EasyMKL(lam=lam, learner=svm)
        # # add into MKL sources
        scores = cross_val_score(KL_norm, y, mkl, n_folds=3, scoring='mae')
        print(lam, C, scores)
示例#3
0
#MKL algorithms
from MKLpy.algorithms import EasyMKL, KOMD  #KOMD is not a MKL algorithm but a simple kernel machine like the SVM
from MKLpy.model_selection import cross_val_score, cross_val_predict
from sklearn.svm import SVC
import numpy as np
print('tuning lambda for EasyMKL...', end='')
base_learner = SVC(C=10000)  #simil hard-margin svm
best_results = {}
for lam in [0, 0.01, 0.1, 0.2, 0.9,
            1]:  #possible lambda values for the EasyMKL algorithm
    #MKLpy.model_selection.cross_val_predict performs the cross validation automatically, it optimizes the accuracy
    #the counterpart cross_val_score optimized the roc_auc_score (use score='roc_auc')
    #WARNING: these functions will change in the next version
    scores = cross_val_predict(KLtr,
                               Ytr,
                               EasyMKL(estimator=base_learner, lam=lam),
                               n_folds=5,
                               score='accuracy')
    acc = np.mean(scores)
    if not best_results or best_results['score'] < acc:
        best_results = {'lam': lam, 'score': acc}
#evaluation on the test set
from sklearn.metrics import accuracy_score
print('done')
clf = EasyMKL(estimator=base_learner, lam=best_results['lam']).fit(KLtr, Ytr)
y_pred = clf.predict(KLte)
accuracy = accuracy_score(Yte, y_pred)
print('accuracy on the test set: %.3f, with lambda=%.2f' %
      (accuracy, best_results['lam']))
示例#4
0
KLtr = [
    pairwise.homogeneous_polynomial_kernel(Xtr, degree=d) for d in range(11)
]
KLte = [
    pairwise.homogeneous_polynomial_kernel(Xte, Xtr, degree=d)
    for d in range(11)
]
print('done')

#MKL algorithms
from MKLpy.algorithms import AverageMKL, EasyMKL

print('training EasyMKL with one-vs-all multiclass strategy...', end='')
from sklearn.svm import SVC
base_learner = SVC(C=0.1)
clf = EasyMKL(lam=0.1, multiclass_strategy='ova',
              learner=base_learner).fit(KLtr, Ytr)
from MKLpy.multiclass import OneVsRestMKLClassifier, OneVsOneMKLClassifier
print('done')
print('the combination weights are:')
for sol in clf.solution:
    print('(%d vs all): ' % sol, clf.solution[sol].weights)

#evaluate the solution
from sklearn.metrics import accuracy_score, roc_auc_score
import numpy as np
y_pred = clf.predict(KLte)  #predictions
y_score = clf.decision_function(KLte)  #rank
accuracy = accuracy_score(Yte, y_pred)
print('Accuracy score: %.3f' % (accuracy))

print('training EasyMKL with one-vs-one multiclass strategy...', end='')
示例#5
0
                                                                   Xtr,
                                                                   degree=d)
                            for d in range(4)
                        ]
                        print('done')
                        # ''' Compute RBF Kernels'''
                        # gamma_range = np.logspace(-9, 3, 13)
                        # ker_list = [rbf_kernel(Xtr, gamma=g) for g in gamma_range]

                        # and train 3 classifiers ###
                        clf = AverageMKL().fit(
                            KLtr, ytr)  # a wrapper for averaging kernels
                        # print(clf.weights)  # print the weights of the combination of base kernels
                        print('training EasyMKL...for polynomials and RBF')
                        clfEasy = EasyMKL(lam=0.1).fit(
                            KLtr, ytr
                        )  # combining kernels with the EasyMKL algorithm
                        # clfRBF = EasyMKL(lam=0.1).fit(ker_list, ytr)
                        print('------')
                        print('finished training')
                    except:
                        count_i += 1
                        print(count_i)
                        print(i, "hin failed here!")

                        continue

                else:
                    print('Shapes dont match.')
                    pass
                print('Average Kernel Testing')
KL = [kernel_normalization(pairwise.monotone_conjunctive_kernel(Xbin, c=c)) for c in range(5)]
print ('done')

#train/test KL split (N.B. here we split a kernel list directly)
from MKLpy.model_selection import train_test_split
KLtr,KLte,Ytr,Yte = train_test_split(KL, Y, test_size=.3, random_state=42)

#MKL algorithms
from MKLpy.algorithms import EasyMKL, KOMD	#KOMD is not a MKL algorithm but a simple kernel machine like the SVM
from MKLpy.model_selection import cross_val_score, cross_val_predict
from sklearn.svm import SVC
import numpy as np
print ('tuning lambda for EasyMKL...', end='')
base_learner = SVC(C=10000)	#simil hard-margin svm
best_results = {}
for lam in [0, 0.01, 0.1, 0.2, 0.9, 1]:	#possible lambda values for the EasyMKL algorithm
	#MKLpy.model_selection.cross_val_predict performs the cross validation automatically, it optimizes the accuracy
	#the counterpart cross_val_score optimized the roc_auc_score (use score='roc_auc')
	#WARNING: these functions will change in the next version
	scores = cross_val_predict(KLtr, Ytr, EasyMKL(estimator=base_learner, lam=lam), n_folds=5, score='accuracy')
	acc = np.mean(scores)
	if not best_results or best_results['score'] < acc:
		best_results = {'lam' : lam, 'score' : acc}
#evaluation on the test set
from sklearn.metrics import accuracy_score
print ('done')
clf = EasyMKL(estimator=base_learner, lam=best_results['lam']).fit(KLtr,Ytr)
y_pred = clf.predict(KLte)
accuracy = accuracy_score(Yte, y_pred)
print ('accuracy on the test set: %.3f, with lambda=%.2f' % (accuracy, best_results['lam']))
示例#7
0
def fitting_function_mkl(key):
    print('For key: ', key, '############')
    labels_file_path = os.path.join(
        symbolData.symbol_specific_label_path(label_idx), key + ".csv")
    print(os.path.isfile(labels_file_path))
    output_dict = defaultdict(dict)

    if os.path.isfile(labels_file_path):  # check that this is a real path
        print(" reading labels")  # this is the labels path!
        labels = pd.read_csv(labels_file_path)
        label_name = str(
            labels.columns[labels.columns.str.contains(pat='label')].values[0])
        logmemoryusage("Before garbage collect")
        hmm_features = nfu.hmm_features_df(
            open_pickle_filepath(symbol_feature_paths[key]))

        if hmm_features.isnull().values.all(
        ):  # checking that the HMM features are actually not null
            pass
            print('lots of NaNs on features')
        else:  # if features not null then start moving on!
            print("can train")
            market_features_df = CreateMarketFeatures(
                CreateMarketFeatures(
                    CreateMarketFeatures(df=CreateMarketFeatures(
                        df=labels).ma_spread_duration()).ma_spread()).
                chaikin_mf()).obv_calc()  # market features dataframe

            df_concat = pd.DataFrame(
                pd.concat([hmm_features, market_features_df],
                          axis=1,
                          sort='False').dropna())

            df = df_concat[df_concat[label_name].notna()]
            df_final = df.drop(columns=[
                'TradedPrice', 'Duration', 'TradedTime', 'ReturnTradedPrice',
                'Volume', label_name
            ])

            y_train = df.reindex(columns=df.columns[df.columns.str.contains(
                pat='label')])  # training labels
            print('go to the labels')

            if df_final.shape[0] < 10:
                print(
                    ' the ratio of classes is too low. try another label permutation'
                )
                # problem_dict[hmm_date][key] = str(key)
                pass
            else:
                print("starting model fit")

                Xtr, Xte, Ytr, Yte = train_test_split(df_final,
                                                      y_train,
                                                      test_size=.2,
                                                      random_state=42)
                # training
                arrXtr = np.array(Xtr)
                X_tr = normalization(rescale_01(arrXtr))
                Y_tr = torch.Tensor(Ytr.values.ravel())

                # testing

                arrXte = np.array(Xte)
                X_te = normalization(rescale_01(arrXte))
                Y_te = torch.Tensor(Yte.values.ravel())

                KLtr = [
                    pairwise.homogeneous_polynomial_kernel(X_tr, degree=d)
                    for d in range(1, 11)
                ] + [identity_kernel(len(Y_tr))]
                KLte = [
                    pairwise.homogeneous_polynomial_kernel(X_te,
                                                           X_tr,
                                                           degree=d)
                    for d in range(1, 11)
                ]
                KLte.append(torch.zeros(KLte[0].size()))
                print('done with kernel')
                try:
                    lam_values = [0.1, 0.2, 1]
                    best_results = {}
                    C_range = [0.1, 1]
                    for C_ch in C_range:
                        base_learner = SVC(C=C_ch)  # "soft"-margin svm
                        print(' fitted the base learner')
                        # possible lambda values for the EasyMKL algorithm
                        for lam in lam_values:
                            print('now here', lam)
                            print(' and tuning lambda for EasyMKL...', end='')
                            base_learner = SVC(C=C_ch)  # "soft"-margin svm
                            # MKLpy.model_selection.cross_val_score performs the cross validation automatically,
                            # it may returns accuracy, auc, or F1 scores
                            scores = cross_val_score(KLtr,
                                                     Y_tr,
                                                     EasyMKL(
                                                         learner=base_learner,
                                                         lam=lam),
                                                     n_folds=5,
                                                     scoring='accuracy')
                            acc = np.mean(scores)
                            if not best_results or best_results['score'] < acc:
                                best_results = {'lam': lam, 'score': acc}
                            # evaluation on the test set

                            print('done', best_results)
                            cv_dict_list[(symbol, hmm_date,
                                          label_idx)][(lam, C_ch)] = [
                                              scores, best_results
                                          ]
                            print(cv_dict_list)

                            pickle_out_filename = os.path.join(
                                mainPath,
                                "ExperimentCommonLocs/MKLFittedModels",
                                "_".join((symbol, 'model_fit_date', str(key),
                                          str(alternate_labels_nos[label_idx]),
                                          'MultiKernelSVC.pkl')))
                            print(pickle_out_filename)

                            pickle_out = open(pickle_out_filename, 'wb')

                            pickle.dump(cv_dict_list, pickle_out)
                            pickle_out.close()

                except (ValueError, TypeError, EOFError):
                    pass
示例#8
0
                    K_list_tr[counter, :, :] = my_kernel(Xtr, Xtr, jcount)
                    counter += 1

                K_list_tr_te = np.zeros(
                    [Number_of_widths, Test_size, Training_size])
                counter = 0

                for jcount in np.arange(Min_Width, Max_Width,
                                        (Max_Width - Min_Width) /
                                        Number_of_widths):

                    K_list_tr_te[counter, :, :] = my_kernel(Xte, Xtr, jcount)
                    counter += 1

                ax = EasyMKL(lam=0.1, kernel='precomputed')
                ker_matrix_tr = ax.arrange_kernel(K_list_tr, Ytr)

                kernel_weights = ax.weights
                kernel_weights = np.reshape(kernel_weights, [-1, 1, 1])
                K_tr = np.multiply(kernel_weights, K_list_tr_te)
                K_tr = np.sum(K_tr, axis=0)

                clf = SVC(C=2, kernel='precomputed').fit(ker_matrix_tr, Ytr)
                predictions = clf.predict(K_tr)
                predictions_storer[icount, :] = predictions
                #print(icount)
                ##
                v = predictions == Yte.T
                v.astype(np.float)
                c = np.sum(v, axis=1)
示例#9
0
                nalsvm.logmemoryusage("Before garbage collect")
                Xtr = normalization(rescale_01(torch.Tensor(pkl_file[date][0].values)))
                Ytr = torch.Tensor(pkl_file[date][1].values)
                print('first bit done')
            nalsvm.gc.collect()
                KLrbf = generators.RBF_generator(Xtr, gamma=[.001, .01, .1])
                print('done with kernel')
                nalsvm.gc.collect()
                try:
                    lam_values = [0, 0.1, 0.2, 1]
                    C_values = [0.01, 1, 10, 100]
                    print(C_values)
                    for lam, C in product(lam_values, C_values):
                        print('now here', C, lam)
                        svm = SVC(C=C)
                        mkl = EasyMKL(lam=lam, learner=svm)
                        scores = cross_val_score(KLrbf, Ytr, mkl, n_folds=3, scoring='accuracy')
                        print(str(scores))
                        print(lam, C, scores)
                        print(type(scores))
                        cv_dict_list[(symbol, date, alternate_label)][(lam, C)] = scores
                        nalsvm.logmemoryusage("Before garbage collect")
                        print('---------------> moving on')

                except (ValueError, TypeError, EOFError):
                    continue
                # only way that seems to work for this
                pickle_out_filename = os.path.join(cross_validation_data_location,
                                                   "_".join((symbol, date, 'RBF_CrossValidationResults.pkl')))
                test_df = pd.DataFrame.from_dict(cv_dict_list)
                test_df.to_pickle(pickle_out_filename)
示例#10
0
KL = [kernel_normalization(pairwise.monotone_conjunctive_kernel(Xbin, c=c)) for c in range(5)]
print ('done')

#train/test KL split (N.B. here we split a kernel list directly)
from MKLpy.model_selection import train_test_split
KLtr,KLte,Ytr,Yte = train_test_split(KL, Y, test_size=.3, random_state=42)

#MKL algorithms
from MKLpy.algorithms import EasyMKL, KOMD	#KOMD is not a MKL algorithm but a simple kernel machine like the SVM
from MKLpy.model_selection import cross_val_score, cross_val_predict
from sklearn.svm import SVC
import numpy as np
print ('tuning lambda for EasyMKL...', end='')
base_learner = SVC(C=10000)	#simil hard-margin svm
best_results = {}
for lam in [0, 0.01, 0.1, 0.2, 0.9, 1]:	#possible lambda values for the EasyMKL algorithm
	#MKLpy.model_selection.cross_val_predict performs the cross validation automatically, it optimizes the accuracy
	#the counterpart cross_val_score optimized the roc_auc_score (use score='roc_auc')
	#WARNING: these functions will change in the next version
	scores = cross_val_predict(KLtr, Ytr, EasyMKL(learner=base_learner, lam=lam), n_folds=5, score='accuracy')
	acc = np.mean(scores)
	if not best_results or best_results['score'] < acc:
		best_results = {'lam' : lam, 'score' : acc}
#evaluation on the test set
from sklearn.metrics import accuracy_score
print ('done')
clf = EasyMKL(learner=base_learner, lam=best_results['lam']).fit(KLtr,Ytr)
y_pred = clf.predict(KLte)
accuracy = accuracy_score(Yte, y_pred)
print ('accuracy on the test set: %.3f, with lambda=%.2f' % (accuracy, best_results['lam']))
        # 对测试数据进行处理

        kernel_functions = [
            k_helpers.create_histogram_kernel,
            k_helpers.create_histogram_kernel,
            # k_helpers.create_rbf_kernel(final_gamma),
            k_helpers.create_exponential_kernel(gamma),
        ]
        n_test = GLCM_X_test.shape[0]
        n_train = GLCM_X_train.shape[0]
        kernel_test_matrices = []
        GLCM_test_matrics = np.empty((n_test, n_train))
        FD_test_matrics = np.empty((n_test, n_train))
        Harris_test_matrics = np.empty((n_test, n_train))
        for i in range(n_test):
            for j in range(n_train):
                GLCM_test_matrics[i][j] = kernel_functions[0](GLCM_X_test[i], GLCM_X_train[j])
                FD_test_matrics[i][j] = kernel_functions[1](FD_X_test[i], FD_X_train[j])
                Harris_test_matrics[i][j] = kernel_functions[2](Harris_X_test[i], Harris_X_train[j])
        kernel_test_matrices.append(GLCM_test_matrics)
        kernel_test_matrices.append(FD_test_matrics)
        kernel_test_matrices.append(Harris_test_matrics)

        final_test_data = k_helpers.get_combined_kernel(kernel_test_matrices, weights)

        MKL_kernel = EasyMKL(estimator=SVC(C=1)).arrange_kernel(final_train_data, y_train)
        clf_svc = SVC(C=1, kernel='precomputed')
        clf_svc.fit(MKL_kernel, y_train)
        score_SVC += clf_svc.score(final_test_data, y_test)
        print('一次循环的精度为%s' % (clf_svc.score(final_test_data, y_test)))
    print('SVC最后的分类精度:%s' % (score_SVC / 10))
示例#12
0
                else:
                    try:
                        X_train = MinMaxScaler().fit_transform(df_final)
                        nalsvm.logmemoryusage("After feature creation")
                        if X_train.shape[0] == y_labels_train.shape[0]:
                            nalsvm.logmemoryusage("Before starting training")
                            print('Shapes Match- starting training ')
                            # polynomial Kernels ##
                            try:
                                KLtr = [pairwise.homogeneous_polynomial_kernel(X_train, degree=d) for d in range(4)]
                                # KLte = [pairwise.homogeneous_polynomial_kernel(Xte, Xtr, degree=d) for d in range(4)]
                                print('done')
                                clf = AverageMKL().fit(KLtr, y_labels_train)  # a wrapper for averaging kernels
                                # print(clf.weights)  # print the weights of the combination of base kernels
                                print('training EasyMKL...for polynomials and RBF')
                                clfEasy = EasyMKL(lam=0.1).fit(KLtr,
                                                               y_labels_train)  # combining kernels with the EasyMKL algorithm
                                print('------')
                                print('finished training')
                                # somewhere here you need to do out of sample testing and then store all that
                                symbolForwardDates = data_cls.forwardDates(joint_keys, joint_keys[joint_key_idx])
                                oos_svc_predictions = defaultdict(dict)
                                # alias to store the data : symbol, joint Date, Label Used
                                results_predict_alias = "_".join((symbol, joint_keys[joint_key_idx, nalsvm.labels_pickle_files[alternate_label_idx]))
                                for forward_date_idx, forward_date in enumerate(symbolForwardDates):
                                    features_oos, labels_oos = nalsvm.ticker_features_labels(nalsvm.jointLocationsDictionary[symbolForwardDates[forward_date_idx]])
                                if nalsvm.hmm_features_df(features_oos).isnull().values.all():
                                    print('Problem')
                                    ## need to get all the data out for
                                    KLte = [pairwise.homogeneous_polynomial_kernel(Xte, X_train, degree=d) for d in range(4)]
                                print('done')
示例#13
0
                    rescale_01(torch.Tensor(
                        pkl_file[model_date][0].values)))  # fitting model
                # put the labels in a tensor format
                Ytr = torch.Tensor(pkl_file[model_date][1].values)
                print('first bit done')
                # force garbage collect
                nalsvm.gc.collect()
                # kernels
                KLrbf = generators.RBF_generator(Xtr, gamma=[.001, .01, .1])
                # dont need the next bit
                print('done with kernel')
                print(forward_dates)
                # base learner- use c =1 or 10
                # the c and lambda values need to be picked up by the cross-val results !
                base_learner = SVC(C=10)

                clf = EasyMKL(lam=0.2,
                              multiclass_strategy='ova',
                              learner=base_learner).fit(KLrbf, Ytr)
                # try ovo as
                # well
                mkl_avg = AverageMKL().fit(KLrbf, Ytr)
                print('done')
                print('the combination weights are:')
                # this bit may be redundant here and we can put it somewhere else
                for sol in clf.solution:
                    print(
                        '(%d vs all): ' % sol, clf.solution[sol].weights
                    )  #dont need this loop- can make it redundant in another file
            except:
                continue
示例#14
0
# print(base_learner)
###########################################################################################
best_results = {}

for lam in [0, 0.0001, 0.0009, 0.001, 0.009, 0.01, 0.09, 0.1, 0.2, 0.9, 1]:
    base_learner = GridSearchCV(svm.SVC(probability=True),
                                param_grid=param_grid,
                                cv=cv,
                                refit='AUC',
                                error_score=0,
                                pre_dispatch='1*n_jobs',
                                n_jobs=1)
    scores = cross_val_score(k1,
                             y_train_A,
                             EasyMKL(learner=base_learner, lam=lam),
                             cv=cv,
                             n_folds=5,
                             scoring='accuracy')
    # print(lam, scores)
    acc = np.mean(scores)
    if not best_results or best_results['score'] < acc:
        best_results = {'lam': lam, 'score': acc}

# EasyMKL-BASED
#############################################################################################
clf = EasyMKL(learner=base_learner, lam=best_results['lam']).fit(k1, y_train_A)
print(clf)
#############################################################################################
# evaluate the solution
from sklearn.metrics import accuracy_score, roc_auc_score
示例#15
0
    def parallelised_function(file):
        select_file_path = os.path.join(jointFeatureLocation,
                                        file)  # formulate the path
        print('Symbol:----->', file.split("_")[0])
        symbol = file.split("_")[0]

        select_hmm_date = select_file_path.split("_")[
            3]  # pull out the hmm_date - strip it out

        select_feature_label_date = select_file_path.split("_")[
            6]  # pull out the label_feature_date

        select_label_idx = select_file_path.split("_")[
            9]  # pull out the label _idx

        unpickled_select_file = open_pickle_filepath(
            select_file_path)  # unplickle the select file

        hmm_keys = sorted(list(
            unpickled_select_file.keys()))  # hmm keys for the select file.

        for hmm_date_key in hmm_keys:  # pick and hmm date
            feature_label_keys = sorted(
                unpickled_select_file[hmm_date_key].keys(
                ))  # each key here unlocks a feature and label set

            for feature_label_date in feature_label_keys:  # make a list of all the feature dates
                features_file_path = unpickled_select_file[hmm_date_key][
                    feature_label_date][0]  # this is the feature path
                labels_file_path = unpickled_select_file[hmm_date_key][
                    feature_label_date][1]  # this is the labels path

                if os.path.isfile(features_file_path
                                  ):  # if label file exists I can traing
                    print(
                        'ok----->', feature_label_date
                    )  # if you got to this point we have data so we can mov eon
                    labels = pd.read_csv(labels_file_path)  # open labels file
                    label_name = str(
                        labels.columns[labels.columns.str.contains(
                            pat='label')].values[0])
                    features = open_pickle_filepath(
                        features_file_path)  # opens features file
                    hmm_features = nfu.hmm_features_df(
                        features
                    )  # get the hmm features out, so unpack the tuples!
                    print('loaded features and labels ')
                    if hmm_features.isnull().values.all(
                    ):  # checking that the HMM features are actually not null
                        continue
                    else:  # if features not null then start moving on!
                        market_features_df = CreateMarketFeatures(
                            CreateMarketFeatures(
                                CreateMarketFeatures(df=CreateMarketFeatures(
                                    df=labels).ma_spread_duration()).ma_spread(
                                    )).chaikin_mf()).obv_calc(
                                    )  # market features dataframe

                        df_concat = pd.DataFrame(
                            pd.concat([hmm_features, market_features_df],
                                      axis=1,
                                      sort='False').dropna())

                        df = df_concat[df_concat[label_name].notna()]
                        df_final = df.drop(columns=[
                            'TradedPrice', 'Duration', 'TradedTime',
                            'ReturnTradedPrice', 'Volume', label_name
                        ])
                        y_train = df[df.columns[df.columns.str.contains(
                            pat='label')]].iloc[:, 0]  # training labels
                        if df_final.shape[
                                0] < 10:  # make sure it all looks reasonable
                            print(
                                ' the ratio of classes is too low. try another label permutation'
                            )
                            continue
                        else:

                            print("starting model fit")
                            # put the features in a tensor format
                            X = np.asarray(
                                df_final.values)  # need this for torch
                            Xtr = normalization(rescale_01(torch.Tensor(
                                X)))  # features in a tensor format

                            Ytr = torch.Tensor(
                                y_train.values
                            )  # put the labels in a tensor format
                            print(
                                '-----------------first bit done------------------'
                            )
                            KLrbf = generators.RBF_generator(
                                Xtr, gamma=[.01, .1, .25, .5]
                            )  # get a few RBF Kernels ready - maybe need more here
                            print('done with kernel')
                            best_results = {}

                            C_range = [0.1, 1]
                            lam_range = [0.2]
                            try:

                                for C_choice in C_range:
                                    base_learner = SVC(
                                        C=C_choice)  # "hard"-margin svm
                                    # clf = EasyMKL(lam=0.2, multiclass_strategy='ova', learner=base_learner).fit(KLrbf,
                                    #                                                                             Ytr)
                                    # print('done')
                                    # print('the combination weights are:')
                                    #
                                    # for sol in clf.solution:
                                    #     print('(%d vs all): ' % sol,
                                    #           clf.solution[
                                    #               sol].weights)  # need to store these results somewhere

                                    for lam in lam_range:  # possible lambda values for the EasyMKL algorithm
                                        # MKLpy.model_selection.cross_val_score performs the cross validation automatically, it may returns
                                        # accuracy, auc, or F1 scores
                                        scores = cross_val_score(
                                            KLrbf,
                                            Ytr,
                                            EasyMKL(learner=base_learner,
                                                    lam=lam),
                                            n_folds=5,
                                            scoring='accuracy'
                                        )  # get the cross-validation scores
                                        acc = np.mean(scores)
                                        if not best_results or best_results[
                                                'score'] < acc:
                                            best_results = {
                                                'C': C_choice,
                                                'lam': lam,
                                                'score': acc,
                                                'scores': scores
                                            }  # these should get dumped somewhere
                                print('done')
                                best_learner = SVC(C=best_results['C'])
                                clf = EasyMKL(learner=best_learner,
                                              lam=best_results['lam']).fit(
                                                  KLrbf, Ytr)
                                y_pred = clf.predict(KLrbf)
                                accuracy = accuracy_score(Ytr, y_pred)
                                print(
                                    'accuracy on the test set: %.3f, with lambda=%.2f'
                                    % (accuracy, best_results['lam']))
                                print(scores)

                                pickle_out_filename = os.path.join(
                                    mainPath,
                                    "ExperimentCommonLocs/CrossValidationResults",
                                    "_".join((symbol, 'feature_label_date',
                                              str(select_feature_label_date),
                                              str(select_label_idx),
                                              'hmm_date:', hmm_date_key, 'RBF',
                                              'MultiKernelSVC.pkl')))
                                # pickle_out = open(pickle_out_filename, 'wb')
                                # pickle.dump(best_results, pickle_out)
                                # pickle_out.close()

                            except ValueError:
                                continue

                else:
                    print('PROBLEM----->in one of of your locations')
                    continue
示例#16
0
def Learning_curve_using_weather_data():
    '''
         Cross validation using weather data: PASS: 2021.02.05
    '''
    # load data
    print('loading dataset...', end='')
    # from sklearn.datasets import load_breast_cancer as load
    # ds = load()
    # X, Y = ds.data, ds.target

    # # Files
    training_data = io.loadmat(
        r"D:\CVProject\CBAM-keras-master\handcraft\features_with_pca.mat")
    # training_data = io.loadmat(r"D:\CVProject\CBAM-keras-master\handcraft\features_with_pca_file.mat")
    # training_data = io.loadmat(r"D:\CVProject\CBAM-keras-master\handcraft\features_with_pca_file_0202.mat")
    results_data = open(
        r"D:\CVProject\CBAM-keras-master\handcraft\results\learning_curve_results_0202_01.txt",
        "w")

    # length = len(training_data['array'][0])
    length = len(training_data['array'][0])

    # X, Y = training_data['array'][:, 0:length - 1], training_data['array'][:, -1]

    X, Y = training_data['array'][:, 0:length - 1], training_data['array'][:,
                                                                           -1]

    print('done')

    # preprocess data
    print('preprocessing data...', end='')
    from MKLpy.preprocessing import normalization, rescale_01
    X = rescale_01(X)  # feature scaling in [0,1]
    X = normalization(X)  # ||X_i||_2^2 = 1
    print('done')

    from MKLpy.algorithms import EasyMKL, KOMD  # KOMD is not a WeatherClsMKL algorithm but a simple kernel machine like the SVM
    from MKLpy.model_selection import cross_val_score
    from sklearn.svm import SVC
    import numpy as np
    # base_learner = SVC(C=10000)  # "hard"-margin svm
    print("Build a base learner")
    base_learner = SVC(C=20)  # "hard"-margin svm

    # # # === parameters selection ===
    # best_results = {}
    # # for lam in [0, 0.01, 0.1, 0.2, 0.9, 1]:  # possible lambda values for the EasyMKL algorithm
    # for lam in [0]:  # possible lambda values for the EasyMKL algorithm
    #     # MKLpy.model_selection.cross_val_score performs the cross validation automatically, it may returns
    #     # accuracy, auc, or F1 scores
    #     # evaluation on the test set
    #     print("Model training with lam {}".format(lam))
    #     clf = EasyMKL(lam=0.1, multiclass_strategy='ova', learner=base_learner).fit(KLtr, Ytr)
    #     scores = cross_val_score(KLtr, Ytr, clf, n_folds=5, scoring='accuracy')
    #     acc = np.mean(scores)
    #     if not best_results or best_results['score'] < acc:
    #         best_results = {'lam': lam, 'score': acc}

    print("Build EasyMKL classifier")
    # clf = EasyMKL(lam=0.1, multiclass_strategy='ova', learner=base_learner).fit(KLtr, Ytr)
    # scores = cross_val_score(KLtr, Ytr, clf, n_folds=5, scoring='accuracy')
    # acc = np.mean(scores)
    # print("acc:", acc)

    # ====== Learning curve =======
    #
    # X1_tr = numpy.array(Xtr[:, :2])             # time
    # X2_tr = numpy.array(Xtr[:, 2:92])          # color
    # X3_tr = numpy.array(Xtr[:, 92:124])        # Gabor
    # X4_tr = numpy.array(Xtr[:, 124:156])       # lbp
    # X5_tr = numpy.array(Xtr[:, 156:348])       # cloud
    # X6_tr = numpy.array(Xtr[:, 348:432])       # haze
    # X7_tr = numpy.array(Xtr[:, 432:603])       # contrast
    # X8_tr = numpy.array(Xtr[:, 603:651])       # shadow
    # X9_tr = numpy.array(Xtr[:, 606:683])       # snow
    # X10_tr = numpy.array(Xtr[:, 683:])          # pca
    #
    # X1_te = numpy.array(Xte[:, :2])             # time
    # X2_te = numpy.array(Xte[:, 2:92])          # color
    # X3_te = numpy.array(Xte[:, 92:124])        # Gabor
    # X4_te = numpy.array(Xte[:, 124:156])       # lbp
    # X5_te = numpy.array(Xte[:, 156:348])       # cloud
    # X6_te = numpy.array(Xte[:, 348:432])       # haze
    # X7_te = numpy.array(Xte[:, 432:603])       # contrast
    # X8_te = numpy.array(Xte[:, 603:651])       # shadow
    # X9_te = numpy.array(Xte[:, 606:683])       # snow
    # X10_te = numpy.array(Xte[:, 683:])       # pca
    # #
    # # # # all features
    # KLtr = Multiview_generator([X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr], kernel=pairwise.rbf_kernel)
    # KLte = Multiview_generator([X1_te, X2_te, X3_te, X4_te, X5_te, X6_te, X7_te, X8_te, X9_te, X10_te], [X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr], kernel=pairwise.rbf_kernel)
    #
    # KYtr = Ytr[:]
    # KYte = Yte[:]

    # for elem in [0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]:
    for elem in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]:
        # for elem in [1]:
        learn_count = int(elem * X.shape[0])
        KLtr, KYtr, KLte, KYte = bulid_kernel_transform(
            X[:learn_count], Y[:learn_count])

        train_count, test_count = len(KYtr), len(KYte)

        clf = EasyMKL(lam=0.1, multiclass_strategy='ova',
                      learner=base_learner).fit(KLtr, KYtr)
        # scores = cross_val_score(KLtr, Ytr, clf, n_folds=5, scoring='accuracy')
        # acc = np.mean(scores)
        y_train_pred = clf.predict(KLtr)
        y_test_pred = clf.predict(KLte)

        train_set_accuracy = accuracy_score(KYtr, y_train_pred)
        tests_et_accuracy = accuracy_score(KYte, y_test_pred)

        # display the results
        print("Test on {0} train samples and {1} test samples,".format(
            train_count, test_count),
              end="")
        print(
            'accuracy on the train set: %.3f and accuracy on the test set : %.3f'
            % (train_set_accuracy, tests_et_accuracy))

        # save the results in txt
        print("Test on {0} train samples and {1} test samples,".format(
            train_count, test_count),
              end="",
              file=results_data)
        print(
            'accuracy on the train set: %.3f and accuracy on the test set : %.3f'
            % (train_set_accuracy, tests_et_accuracy),
            file=results_data)

    # from sklearn.metrics import accuracy_score
    print('done')
    # ==============================

    pass
    # # # ===== evaluate the model =====
    # # # Chose the model with high performance
    #
    # # Transform
    # X1_tr = numpy.array(Xtr[:, :2])             # time
    # X2_tr = numpy.array(Xtr[:, 2:92])          # color
    # X3_tr = numpy.array(Xtr[:, 92:124])        # Gabor
    # X4_tr = numpy.array(Xtr[:, 124:156])       # lbp
    # X5_tr = numpy.array(Xtr[:, 156:348])       # cloud
    # X6_tr = numpy.array(Xtr[:, 348:432])       # haze
    # X7_tr = numpy.array(Xtr[:, 432:603])       # contrast
    # X8_tr = numpy.array(Xtr[:, 603:606])       # shadow
    # X9_tr = numpy.array(Xtr[:, 606:608])       # snow
    # X10_tr = numpy.array(Xtr[:, 608:])          # pca
    #
    # X1_te = numpy.array(Xte[:, :2])             # time
    # X2_te = numpy.array(Xte[:, 2:92])          # color
    # X3_te = numpy.array(Xte[:, 92:124])        # Gabor
    # X4_te = numpy.array(Xte[:, 124:156])       # lbp
    # X5_te = numpy.array(Xte[:, 156:348])       # cloud
    # X6_te = numpy.array(Xte[:, 348:432])       # haze
    # X7_te = numpy.array(Xte[:, 432:603])       # contrast
    # X8_te = numpy.array(Xte[:, 603:606])       # shadow
    # X9_te = numpy.array(Xte[:, 606:608])       # snow
    # X10_te = numpy.array(Xte[:, 608:])       # pca
    #
    # # # all features
    # KLtr = Multiview_generator([X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr], kernel=pairwise.homogeneous_polynomial_kernel)
    # KLte = Multiview_generator([X1_te, X2_te, X3_te, X4_te, X5_te, X6_te, X7_te, X8_te, X9_te, X10_te], [X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr], kernel=pairwise.homogeneous_polynomial_kernel)
    #
    # KYtr = Ytr[:]
    # KYte = Yte[:]
    #
    # clf = EasyMKL(learner=base_learner, lam=0.1).fit(KLtr, KYtr)
    # y_train_pred = clf.predict(KLtr)
    # y_test_pred = clf.predict(KLte)
    #
    # train_set_accuracy = accuracy_score(KYtr, y_train_pred)
    # tests_et_accuracy = accuracy_score(KYte, y_test_pred)
    #
    # # print('accuracy on the test set: %.3f, with lambda=%.2f' % (accuracy, best_results['lam']))
    # print('accuracy on the train set: %.3f, and accuracy on the test set : %.3f' % (train_set_accuracy, tests_et_accuracy))
    # # ======================
    pass
示例#17
0
]
KLte = [
    pairwise.homogeneous_polynomial_kernel(Xte, Xtr, degree=d)
    for d in range(11)
]
print('done')

#MKL algorithms
from MKLpy.algorithms import AverageMKL, EasyMKL, KOMD  #KOMD is not a MKL algorithm but a simple kernel machine like the SVM
print('training AverageMKL...', end='')
clf = AverageMKL().fit(KLtr, Ytr)  #a wrapper for averaging kernels
print('done')
K_average = clf.solution.ker_matrix  #the combined kernel matrix

print('training EasyMKL...', end='')
clf = EasyMKL(lam=0.1).fit(KLtr,
                           Ytr)  #combining kernels with the EasyMKL algorithm
#lam is a hyper-parameter in [0,1]
print('done')
print('the combination weights are:')
print(clf.solution.weights)

#evaluate the solution
from sklearn.metrics import accuracy_score, roc_auc_score
y_pred = clf.predict(KLte)  #predictions
y_score = clf.decision_function(KLte)  #rank
accuracy = accuracy_score(Yte, y_pred)
roc_auc = roc_auc_score(Yte, y_score)
print('Accuracy score: %.3f, roc AUC score: %.3f' % (accuracy, roc_auc))

#select the base-learner
#MKL algorithms use a hard-margin SVM as base learned (or KOMD in the case of EasyMKL).
示例#18
0
def MultiView_learning():
    """MultiView learning"""
    print('loading dataset...', end='')

    training_data = io.loadmat(
        r"D:\CVProject\CBAM-keras-master\handcraft\features_with_pca_file_0202.mat"
    )
    length = len(training_data['array'][0])
    X, Y = training_data['array'][:, 0:length - 2], training_data['array'][:,
                                                                           -1]
    print('done')

    # preprocess data
    print('preprocessing data...', end='')
    from MKLpy.preprocessing import normalization, rescale_01
    X = rescale_01(X)  # feature scaling in [0,1]
    X = normalization(X)  # ||X_i||_2^2 = 1

    # train/test split
    from sklearn.model_selection import train_test_split
    Xtr, Xte, Ytr, Yte = train_test_split(X,
                                          Y,
                                          test_size=.1,
                                          random_state=42,
                                          shuffle=True)

    print(numpy.array(Xtr).shape)
    print(numpy.array(Ytr).shape)

    print('done')
    print('Training on {0} samples, Testing on {1} samples'.format(
        len(Xtr), len(Xte)))

    print('computing RBF Kernels...', end='')

    from MKLpy.metrics import pairwise
    from MKLpy.generators import Multiview_generator

    X1_tr = numpy.array(Xtr[:, :2])  # time
    X2_tr = numpy.array(Xtr[:, 2:92])  # color
    X3_tr = numpy.array(Xtr[:, 92:124])  # Gabor
    X4_tr = numpy.array(Xtr[:, 124:156])  # lbp
    X5_tr = numpy.array(Xtr[:, 156:348])  # cloud
    X6_tr = numpy.array(Xtr[:, 348:432])  # haze
    X7_tr = numpy.array(Xtr[:, 432:603])  # contrast
    X8_tr = numpy.array(Xtr[:, 603:606])  # shadow
    X9_tr = numpy.array(Xtr[:, 606:608])  # snow
    X10_tr = numpy.array(Xtr[:, 608:])  # pca

    X1_te = numpy.array(Xte[:, :2])  # time
    X2_te = numpy.array(Xte[:, 2:92])  # color
    X3_te = numpy.array(Xte[:, 92:124])  # Gabor
    X4_te = numpy.array(Xte[:, 124:156])  # lbp
    X5_te = numpy.array(Xte[:, 156:348])  # cloud
    X6_te = numpy.array(Xte[:, 348:432])  # haze
    X7_te = numpy.array(Xte[:, 432:603])  # contrast
    X8_te = numpy.array(Xte[:, 603:606])  # shadow
    X9_te = numpy.array(Xte[:, 606:608])  # snow
    X10_te = numpy.array(Xte[:, 608:])  # pca

    KLtr = Multiview_generator([
        X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr
    ],
                               kernel=pairwise.rbf_kernel)
    KLte = Multiview_generator([
        X1_te, X2_te, X3_te, X4_te, X5_te, X6_te, X7_te, X8_te, X9_te, X10_te
    ], [X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr],
                               kernel=pairwise.rbf_kernel)

    print('done')

    from MKLpy.algorithms import AverageMKL, EasyMKL
    print('training EasyMKL with one-vs-all multiclass strategy...', end='')
    from sklearn.svm import SVC
    base_learner = SVC(C=8)
    clf = EasyMKL(lam=0.1, multiclass_strategy='ova',
                  learner=base_learner).fit(KLtr, Ytr)

    print('the combination weights are:')
    for sol in clf.solution:
        print('(%d vs all): ' % sol, clf.solution[sol].weights)

    from sklearn.metrics import accuracy_score, roc_auc_score, recall_score, confusion_matrix
    y_pred = clf.predict(KLte)  # predictions
    y_score = clf.decision_function(KLte)  # rank
    accuracy = accuracy_score(Yte, y_pred)
    print('Accuracy score: %.4f' % (accuracy))
    recall = recall_score(Yte, y_pred, average='macro')
    print('Recall score: %.4f' % (recall))
    cm = confusion_matrix(Yte, y_pred)
    print('Confusion matrix', cm)

    print('training EasyMKL with one-vs-one multiclass strategy...', end='')
    clf = EasyMKL(lam=0.1, multiclass_strategy='ovo',
                  learner=base_learner).fit(KLtr, Ytr)
    print('done')
    print('the combination weights are:')
    for sol in clf.solution:
        print('(%d vs %d): ' % (sol[0], sol[1]), clf.solution[sol].weights)

    y_pred = clf.predict(KLte)  # predictions
    y_score = clf.decision_function(KLte)  # rank
    accuracy = accuracy_score(Yte, y_pred)
    print('Accuracy score: %.4f' % (accuracy))
    recall = recall_score(Yte, y_pred, average='macro')
    print('Recall score: %.4f' % (recall))
    cm = confusion_matrix(Yte, y_pred)
    print('Confusion matrix', cm)
示例#19
0
# print(base_learner)
###########################################################################################
best_results = {}

for lam in [0, 0.0001, 0.0009, 0.001, 0.009, 0.01, 0.09, 0.1, 0.2, 0.9, 1]:
    base_learner = GridSearchCV(svm.SVC(probability=True),
                                param_grid=param_grid,
                                cv=cv,
                                refit='AUC',
                                error_score=0,
                                pre_dispatch='1*n_jobs',
                                n_jobs=1)
    scores = cross_val_score(k1,
                             y_tr_A,
                             EasyMKL(learner=base_learner, lam=lam),
                             cv=cv,
                             n_folds=5,
                             scoring='accuracy')
    # print(lam, scores)
    acc = np.mean(scores)
    if not best_results or best_results['score'] < acc:
        best_results = {'lam': lam, 'score': acc}

# EasyMKL-BASED
#############################################################################################
clf = EasyMKL(learner=base_learner,
              lam=best_results['lam']).fit(k1 + k2 + k3 + k4 + k5 + k6, y_tr_A)
print(clf)
#############################################################################################
# evaluate the solution
示例#20
0
import numpy as np

ds = load_iris()
X, Y = ds.data, ds.target
classes = np.unique(Y)
print('done [%d classes]' % len(classes))
'''
WARNING: be sure that your matrix is not sparse! EXAMPLE:
from sklearn.datasets import load_svmlight_file
X,Y = load_svmlight_file(...)
X = X.toarray()
'''

#compute homogeneous polynomial kernels with degrees 0,1,2,...,10.
print('computing Homogeneous Polynomial Kernels...', end='')
from MKLpy.metrics import pairwise

KL = [pairwise.homogeneous_polynomial_kernel(X, degree=d) for d in range(1, 4)]
print('done')

#MKL algorithms
from MKLpy.algorithms import EasyMKL

print('training EasyMKL...', end='')
clf = EasyMKL(lam=0.1, multiclass_strategy='ovo').fit(
    KL, Y)  #combining kernels with the EasyMKL algorithm
#multiclass_strategy should be 'ovo' for one-vs-one decomposition strategy, and 'ova' for one-vs-all/rest strategy
print('done')

print(clf.weights)