def check_cross_validation(): scores = cross_val_score(KL, Y, EasyMKL(lam=0.1, kernel='precomputed'), n_folds=3) assert len(scores) == 3 pass
def MKL(): fname, pv, tv, org_metrics = experiment_setting() print(fname, pv, tv) list_pair_metrics = [["l1", "l2"]] for metrics in list_pair_metrics: X, y, sim_matrices = get_s_metric(fname=fname, tv=tv, pv=pv, metrics=metrics) # # from similarity to kernel matrix KL = [np.exp(s) / 0.01 for s in sim_matrices] KL_norm = [kernel_normalization(K) for K in KL] print(KL_norm, sim_matrices) # KLtr, KLte, Ytr, Yte = train_test_split(KL, Y, random_state=42, shuffle=True, test_size=.3) print(y) # # polynomial kernel # KL_norm = [hpk(X, degree=d) for d in range(1,11)] gamma_values = [0.001, 0.01, 0.1, 1, 10] lam_values = [0, 0.1, 0.2, 1] C_values = [0.01, 1, 100] # for lam in lam_values: # for gamma, C in product(gamma_values, C_values): # svm = SVR(kernel="rbf", C=C, gamma=gamma) # mkl = EasyMKL(lam=lam, learner=svm) # scores = cross_val_score(KL_norm, y, mkl, n_folds=3, scoring='mae') # print (lam, C, scores) for lam, C in product(lam_values, C_values): svm = SVC(C=C) mkl = EasyMKL(lam=lam, learner=svm) # # add into MKL sources scores = cross_val_score(KL_norm, y, mkl, n_folds=3, scoring='mae') print(lam, C, scores)
#MKL algorithms from MKLpy.algorithms import EasyMKL, KOMD #KOMD is not a MKL algorithm but a simple kernel machine like the SVM from MKLpy.model_selection import cross_val_score, cross_val_predict from sklearn.svm import SVC import numpy as np print('tuning lambda for EasyMKL...', end='') base_learner = SVC(C=10000) #simil hard-margin svm best_results = {} for lam in [0, 0.01, 0.1, 0.2, 0.9, 1]: #possible lambda values for the EasyMKL algorithm #MKLpy.model_selection.cross_val_predict performs the cross validation automatically, it optimizes the accuracy #the counterpart cross_val_score optimized the roc_auc_score (use score='roc_auc') #WARNING: these functions will change in the next version scores = cross_val_predict(KLtr, Ytr, EasyMKL(estimator=base_learner, lam=lam), n_folds=5, score='accuracy') acc = np.mean(scores) if not best_results or best_results['score'] < acc: best_results = {'lam': lam, 'score': acc} #evaluation on the test set from sklearn.metrics import accuracy_score print('done') clf = EasyMKL(estimator=base_learner, lam=best_results['lam']).fit(KLtr, Ytr) y_pred = clf.predict(KLte) accuracy = accuracy_score(Yte, y_pred) print('accuracy on the test set: %.3f, with lambda=%.2f' % (accuracy, best_results['lam']))
KLtr = [ pairwise.homogeneous_polynomial_kernel(Xtr, degree=d) for d in range(11) ] KLte = [ pairwise.homogeneous_polynomial_kernel(Xte, Xtr, degree=d) for d in range(11) ] print('done') #MKL algorithms from MKLpy.algorithms import AverageMKL, EasyMKL print('training EasyMKL with one-vs-all multiclass strategy...', end='') from sklearn.svm import SVC base_learner = SVC(C=0.1) clf = EasyMKL(lam=0.1, multiclass_strategy='ova', learner=base_learner).fit(KLtr, Ytr) from MKLpy.multiclass import OneVsRestMKLClassifier, OneVsOneMKLClassifier print('done') print('the combination weights are:') for sol in clf.solution: print('(%d vs all): ' % sol, clf.solution[sol].weights) #evaluate the solution from sklearn.metrics import accuracy_score, roc_auc_score import numpy as np y_pred = clf.predict(KLte) #predictions y_score = clf.decision_function(KLte) #rank accuracy = accuracy_score(Yte, y_pred) print('Accuracy score: %.3f' % (accuracy)) print('training EasyMKL with one-vs-one multiclass strategy...', end='')
Xtr, degree=d) for d in range(4) ] print('done') # ''' Compute RBF Kernels''' # gamma_range = np.logspace(-9, 3, 13) # ker_list = [rbf_kernel(Xtr, gamma=g) for g in gamma_range] # and train 3 classifiers ### clf = AverageMKL().fit( KLtr, ytr) # a wrapper for averaging kernels # print(clf.weights) # print the weights of the combination of base kernels print('training EasyMKL...for polynomials and RBF') clfEasy = EasyMKL(lam=0.1).fit( KLtr, ytr ) # combining kernels with the EasyMKL algorithm # clfRBF = EasyMKL(lam=0.1).fit(ker_list, ytr) print('------') print('finished training') except: count_i += 1 print(count_i) print(i, "hin failed here!") continue else: print('Shapes dont match.') pass print('Average Kernel Testing')
KL = [kernel_normalization(pairwise.monotone_conjunctive_kernel(Xbin, c=c)) for c in range(5)] print ('done') #train/test KL split (N.B. here we split a kernel list directly) from MKLpy.model_selection import train_test_split KLtr,KLte,Ytr,Yte = train_test_split(KL, Y, test_size=.3, random_state=42) #MKL algorithms from MKLpy.algorithms import EasyMKL, KOMD #KOMD is not a MKL algorithm but a simple kernel machine like the SVM from MKLpy.model_selection import cross_val_score, cross_val_predict from sklearn.svm import SVC import numpy as np print ('tuning lambda for EasyMKL...', end='') base_learner = SVC(C=10000) #simil hard-margin svm best_results = {} for lam in [0, 0.01, 0.1, 0.2, 0.9, 1]: #possible lambda values for the EasyMKL algorithm #MKLpy.model_selection.cross_val_predict performs the cross validation automatically, it optimizes the accuracy #the counterpart cross_val_score optimized the roc_auc_score (use score='roc_auc') #WARNING: these functions will change in the next version scores = cross_val_predict(KLtr, Ytr, EasyMKL(estimator=base_learner, lam=lam), n_folds=5, score='accuracy') acc = np.mean(scores) if not best_results or best_results['score'] < acc: best_results = {'lam' : lam, 'score' : acc} #evaluation on the test set from sklearn.metrics import accuracy_score print ('done') clf = EasyMKL(estimator=base_learner, lam=best_results['lam']).fit(KLtr,Ytr) y_pred = clf.predict(KLte) accuracy = accuracy_score(Yte, y_pred) print ('accuracy on the test set: %.3f, with lambda=%.2f' % (accuracy, best_results['lam']))
def fitting_function_mkl(key): print('For key: ', key, '############') labels_file_path = os.path.join( symbolData.symbol_specific_label_path(label_idx), key + ".csv") print(os.path.isfile(labels_file_path)) output_dict = defaultdict(dict) if os.path.isfile(labels_file_path): # check that this is a real path print(" reading labels") # this is the labels path! labels = pd.read_csv(labels_file_path) label_name = str( labels.columns[labels.columns.str.contains(pat='label')].values[0]) logmemoryusage("Before garbage collect") hmm_features = nfu.hmm_features_df( open_pickle_filepath(symbol_feature_paths[key])) if hmm_features.isnull().values.all( ): # checking that the HMM features are actually not null pass print('lots of NaNs on features') else: # if features not null then start moving on! print("can train") market_features_df = CreateMarketFeatures( CreateMarketFeatures( CreateMarketFeatures(df=CreateMarketFeatures( df=labels).ma_spread_duration()).ma_spread()). chaikin_mf()).obv_calc() # market features dataframe df_concat = pd.DataFrame( pd.concat([hmm_features, market_features_df], axis=1, sort='False').dropna()) df = df_concat[df_concat[label_name].notna()] df_final = df.drop(columns=[ 'TradedPrice', 'Duration', 'TradedTime', 'ReturnTradedPrice', 'Volume', label_name ]) y_train = df.reindex(columns=df.columns[df.columns.str.contains( pat='label')]) # training labels print('go to the labels') if df_final.shape[0] < 10: print( ' the ratio of classes is too low. try another label permutation' ) # problem_dict[hmm_date][key] = str(key) pass else: print("starting model fit") Xtr, Xte, Ytr, Yte = train_test_split(df_final, y_train, test_size=.2, random_state=42) # training arrXtr = np.array(Xtr) X_tr = normalization(rescale_01(arrXtr)) Y_tr = torch.Tensor(Ytr.values.ravel()) # testing arrXte = np.array(Xte) X_te = normalization(rescale_01(arrXte)) Y_te = torch.Tensor(Yte.values.ravel()) KLtr = [ pairwise.homogeneous_polynomial_kernel(X_tr, degree=d) for d in range(1, 11) ] + [identity_kernel(len(Y_tr))] KLte = [ pairwise.homogeneous_polynomial_kernel(X_te, X_tr, degree=d) for d in range(1, 11) ] KLte.append(torch.zeros(KLte[0].size())) print('done with kernel') try: lam_values = [0.1, 0.2, 1] best_results = {} C_range = [0.1, 1] for C_ch in C_range: base_learner = SVC(C=C_ch) # "soft"-margin svm print(' fitted the base learner') # possible lambda values for the EasyMKL algorithm for lam in lam_values: print('now here', lam) print(' and tuning lambda for EasyMKL...', end='') base_learner = SVC(C=C_ch) # "soft"-margin svm # MKLpy.model_selection.cross_val_score performs the cross validation automatically, # it may returns accuracy, auc, or F1 scores scores = cross_val_score(KLtr, Y_tr, EasyMKL( learner=base_learner, lam=lam), n_folds=5, scoring='accuracy') acc = np.mean(scores) if not best_results or best_results['score'] < acc: best_results = {'lam': lam, 'score': acc} # evaluation on the test set print('done', best_results) cv_dict_list[(symbol, hmm_date, label_idx)][(lam, C_ch)] = [ scores, best_results ] print(cv_dict_list) pickle_out_filename = os.path.join( mainPath, "ExperimentCommonLocs/MKLFittedModels", "_".join((symbol, 'model_fit_date', str(key), str(alternate_labels_nos[label_idx]), 'MultiKernelSVC.pkl'))) print(pickle_out_filename) pickle_out = open(pickle_out_filename, 'wb') pickle.dump(cv_dict_list, pickle_out) pickle_out.close() except (ValueError, TypeError, EOFError): pass
K_list_tr[counter, :, :] = my_kernel(Xtr, Xtr, jcount) counter += 1 K_list_tr_te = np.zeros( [Number_of_widths, Test_size, Training_size]) counter = 0 for jcount in np.arange(Min_Width, Max_Width, (Max_Width - Min_Width) / Number_of_widths): K_list_tr_te[counter, :, :] = my_kernel(Xte, Xtr, jcount) counter += 1 ax = EasyMKL(lam=0.1, kernel='precomputed') ker_matrix_tr = ax.arrange_kernel(K_list_tr, Ytr) kernel_weights = ax.weights kernel_weights = np.reshape(kernel_weights, [-1, 1, 1]) K_tr = np.multiply(kernel_weights, K_list_tr_te) K_tr = np.sum(K_tr, axis=0) clf = SVC(C=2, kernel='precomputed').fit(ker_matrix_tr, Ytr) predictions = clf.predict(K_tr) predictions_storer[icount, :] = predictions #print(icount) ## v = predictions == Yte.T v.astype(np.float) c = np.sum(v, axis=1)
nalsvm.logmemoryusage("Before garbage collect") Xtr = normalization(rescale_01(torch.Tensor(pkl_file[date][0].values))) Ytr = torch.Tensor(pkl_file[date][1].values) print('first bit done') nalsvm.gc.collect() KLrbf = generators.RBF_generator(Xtr, gamma=[.001, .01, .1]) print('done with kernel') nalsvm.gc.collect() try: lam_values = [0, 0.1, 0.2, 1] C_values = [0.01, 1, 10, 100] print(C_values) for lam, C in product(lam_values, C_values): print('now here', C, lam) svm = SVC(C=C) mkl = EasyMKL(lam=lam, learner=svm) scores = cross_val_score(KLrbf, Ytr, mkl, n_folds=3, scoring='accuracy') print(str(scores)) print(lam, C, scores) print(type(scores)) cv_dict_list[(symbol, date, alternate_label)][(lam, C)] = scores nalsvm.logmemoryusage("Before garbage collect") print('---------------> moving on') except (ValueError, TypeError, EOFError): continue # only way that seems to work for this pickle_out_filename = os.path.join(cross_validation_data_location, "_".join((symbol, date, 'RBF_CrossValidationResults.pkl'))) test_df = pd.DataFrame.from_dict(cv_dict_list) test_df.to_pickle(pickle_out_filename)
KL = [kernel_normalization(pairwise.monotone_conjunctive_kernel(Xbin, c=c)) for c in range(5)] print ('done') #train/test KL split (N.B. here we split a kernel list directly) from MKLpy.model_selection import train_test_split KLtr,KLte,Ytr,Yte = train_test_split(KL, Y, test_size=.3, random_state=42) #MKL algorithms from MKLpy.algorithms import EasyMKL, KOMD #KOMD is not a MKL algorithm but a simple kernel machine like the SVM from MKLpy.model_selection import cross_val_score, cross_val_predict from sklearn.svm import SVC import numpy as np print ('tuning lambda for EasyMKL...', end='') base_learner = SVC(C=10000) #simil hard-margin svm best_results = {} for lam in [0, 0.01, 0.1, 0.2, 0.9, 1]: #possible lambda values for the EasyMKL algorithm #MKLpy.model_selection.cross_val_predict performs the cross validation automatically, it optimizes the accuracy #the counterpart cross_val_score optimized the roc_auc_score (use score='roc_auc') #WARNING: these functions will change in the next version scores = cross_val_predict(KLtr, Ytr, EasyMKL(learner=base_learner, lam=lam), n_folds=5, score='accuracy') acc = np.mean(scores) if not best_results or best_results['score'] < acc: best_results = {'lam' : lam, 'score' : acc} #evaluation on the test set from sklearn.metrics import accuracy_score print ('done') clf = EasyMKL(learner=base_learner, lam=best_results['lam']).fit(KLtr,Ytr) y_pred = clf.predict(KLte) accuracy = accuracy_score(Yte, y_pred) print ('accuracy on the test set: %.3f, with lambda=%.2f' % (accuracy, best_results['lam']))
# 对测试数据进行处理 kernel_functions = [ k_helpers.create_histogram_kernel, k_helpers.create_histogram_kernel, # k_helpers.create_rbf_kernel(final_gamma), k_helpers.create_exponential_kernel(gamma), ] n_test = GLCM_X_test.shape[0] n_train = GLCM_X_train.shape[0] kernel_test_matrices = [] GLCM_test_matrics = np.empty((n_test, n_train)) FD_test_matrics = np.empty((n_test, n_train)) Harris_test_matrics = np.empty((n_test, n_train)) for i in range(n_test): for j in range(n_train): GLCM_test_matrics[i][j] = kernel_functions[0](GLCM_X_test[i], GLCM_X_train[j]) FD_test_matrics[i][j] = kernel_functions[1](FD_X_test[i], FD_X_train[j]) Harris_test_matrics[i][j] = kernel_functions[2](Harris_X_test[i], Harris_X_train[j]) kernel_test_matrices.append(GLCM_test_matrics) kernel_test_matrices.append(FD_test_matrics) kernel_test_matrices.append(Harris_test_matrics) final_test_data = k_helpers.get_combined_kernel(kernel_test_matrices, weights) MKL_kernel = EasyMKL(estimator=SVC(C=1)).arrange_kernel(final_train_data, y_train) clf_svc = SVC(C=1, kernel='precomputed') clf_svc.fit(MKL_kernel, y_train) score_SVC += clf_svc.score(final_test_data, y_test) print('一次循环的精度为%s' % (clf_svc.score(final_test_data, y_test))) print('SVC最后的分类精度:%s' % (score_SVC / 10))
else: try: X_train = MinMaxScaler().fit_transform(df_final) nalsvm.logmemoryusage("After feature creation") if X_train.shape[0] == y_labels_train.shape[0]: nalsvm.logmemoryusage("Before starting training") print('Shapes Match- starting training ') # polynomial Kernels ## try: KLtr = [pairwise.homogeneous_polynomial_kernel(X_train, degree=d) for d in range(4)] # KLte = [pairwise.homogeneous_polynomial_kernel(Xte, Xtr, degree=d) for d in range(4)] print('done') clf = AverageMKL().fit(KLtr, y_labels_train) # a wrapper for averaging kernels # print(clf.weights) # print the weights of the combination of base kernels print('training EasyMKL...for polynomials and RBF') clfEasy = EasyMKL(lam=0.1).fit(KLtr, y_labels_train) # combining kernels with the EasyMKL algorithm print('------') print('finished training') # somewhere here you need to do out of sample testing and then store all that symbolForwardDates = data_cls.forwardDates(joint_keys, joint_keys[joint_key_idx]) oos_svc_predictions = defaultdict(dict) # alias to store the data : symbol, joint Date, Label Used results_predict_alias = "_".join((symbol, joint_keys[joint_key_idx, nalsvm.labels_pickle_files[alternate_label_idx])) for forward_date_idx, forward_date in enumerate(symbolForwardDates): features_oos, labels_oos = nalsvm.ticker_features_labels(nalsvm.jointLocationsDictionary[symbolForwardDates[forward_date_idx]]) if nalsvm.hmm_features_df(features_oos).isnull().values.all(): print('Problem') ## need to get all the data out for KLte = [pairwise.homogeneous_polynomial_kernel(Xte, X_train, degree=d) for d in range(4)] print('done')
rescale_01(torch.Tensor( pkl_file[model_date][0].values))) # fitting model # put the labels in a tensor format Ytr = torch.Tensor(pkl_file[model_date][1].values) print('first bit done') # force garbage collect nalsvm.gc.collect() # kernels KLrbf = generators.RBF_generator(Xtr, gamma=[.001, .01, .1]) # dont need the next bit print('done with kernel') print(forward_dates) # base learner- use c =1 or 10 # the c and lambda values need to be picked up by the cross-val results ! base_learner = SVC(C=10) clf = EasyMKL(lam=0.2, multiclass_strategy='ova', learner=base_learner).fit(KLrbf, Ytr) # try ovo as # well mkl_avg = AverageMKL().fit(KLrbf, Ytr) print('done') print('the combination weights are:') # this bit may be redundant here and we can put it somewhere else for sol in clf.solution: print( '(%d vs all): ' % sol, clf.solution[sol].weights ) #dont need this loop- can make it redundant in another file except: continue
# print(base_learner) ########################################################################################### best_results = {} for lam in [0, 0.0001, 0.0009, 0.001, 0.009, 0.01, 0.09, 0.1, 0.2, 0.9, 1]: base_learner = GridSearchCV(svm.SVC(probability=True), param_grid=param_grid, cv=cv, refit='AUC', error_score=0, pre_dispatch='1*n_jobs', n_jobs=1) scores = cross_val_score(k1, y_train_A, EasyMKL(learner=base_learner, lam=lam), cv=cv, n_folds=5, scoring='accuracy') # print(lam, scores) acc = np.mean(scores) if not best_results or best_results['score'] < acc: best_results = {'lam': lam, 'score': acc} # EasyMKL-BASED ############################################################################################# clf = EasyMKL(learner=base_learner, lam=best_results['lam']).fit(k1, y_train_A) print(clf) ############################################################################################# # evaluate the solution from sklearn.metrics import accuracy_score, roc_auc_score
def parallelised_function(file): select_file_path = os.path.join(jointFeatureLocation, file) # formulate the path print('Symbol:----->', file.split("_")[0]) symbol = file.split("_")[0] select_hmm_date = select_file_path.split("_")[ 3] # pull out the hmm_date - strip it out select_feature_label_date = select_file_path.split("_")[ 6] # pull out the label_feature_date select_label_idx = select_file_path.split("_")[ 9] # pull out the label _idx unpickled_select_file = open_pickle_filepath( select_file_path) # unplickle the select file hmm_keys = sorted(list( unpickled_select_file.keys())) # hmm keys for the select file. for hmm_date_key in hmm_keys: # pick and hmm date feature_label_keys = sorted( unpickled_select_file[hmm_date_key].keys( )) # each key here unlocks a feature and label set for feature_label_date in feature_label_keys: # make a list of all the feature dates features_file_path = unpickled_select_file[hmm_date_key][ feature_label_date][0] # this is the feature path labels_file_path = unpickled_select_file[hmm_date_key][ feature_label_date][1] # this is the labels path if os.path.isfile(features_file_path ): # if label file exists I can traing print( 'ok----->', feature_label_date ) # if you got to this point we have data so we can mov eon labels = pd.read_csv(labels_file_path) # open labels file label_name = str( labels.columns[labels.columns.str.contains( pat='label')].values[0]) features = open_pickle_filepath( features_file_path) # opens features file hmm_features = nfu.hmm_features_df( features ) # get the hmm features out, so unpack the tuples! print('loaded features and labels ') if hmm_features.isnull().values.all( ): # checking that the HMM features are actually not null continue else: # if features not null then start moving on! market_features_df = CreateMarketFeatures( CreateMarketFeatures( CreateMarketFeatures(df=CreateMarketFeatures( df=labels).ma_spread_duration()).ma_spread( )).chaikin_mf()).obv_calc( ) # market features dataframe df_concat = pd.DataFrame( pd.concat([hmm_features, market_features_df], axis=1, sort='False').dropna()) df = df_concat[df_concat[label_name].notna()] df_final = df.drop(columns=[ 'TradedPrice', 'Duration', 'TradedTime', 'ReturnTradedPrice', 'Volume', label_name ]) y_train = df[df.columns[df.columns.str.contains( pat='label')]].iloc[:, 0] # training labels if df_final.shape[ 0] < 10: # make sure it all looks reasonable print( ' the ratio of classes is too low. try another label permutation' ) continue else: print("starting model fit") # put the features in a tensor format X = np.asarray( df_final.values) # need this for torch Xtr = normalization(rescale_01(torch.Tensor( X))) # features in a tensor format Ytr = torch.Tensor( y_train.values ) # put the labels in a tensor format print( '-----------------first bit done------------------' ) KLrbf = generators.RBF_generator( Xtr, gamma=[.01, .1, .25, .5] ) # get a few RBF Kernels ready - maybe need more here print('done with kernel') best_results = {} C_range = [0.1, 1] lam_range = [0.2] try: for C_choice in C_range: base_learner = SVC( C=C_choice) # "hard"-margin svm # clf = EasyMKL(lam=0.2, multiclass_strategy='ova', learner=base_learner).fit(KLrbf, # Ytr) # print('done') # print('the combination weights are:') # # for sol in clf.solution: # print('(%d vs all): ' % sol, # clf.solution[ # sol].weights) # need to store these results somewhere for lam in lam_range: # possible lambda values for the EasyMKL algorithm # MKLpy.model_selection.cross_val_score performs the cross validation automatically, it may returns # accuracy, auc, or F1 scores scores = cross_val_score( KLrbf, Ytr, EasyMKL(learner=base_learner, lam=lam), n_folds=5, scoring='accuracy' ) # get the cross-validation scores acc = np.mean(scores) if not best_results or best_results[ 'score'] < acc: best_results = { 'C': C_choice, 'lam': lam, 'score': acc, 'scores': scores } # these should get dumped somewhere print('done') best_learner = SVC(C=best_results['C']) clf = EasyMKL(learner=best_learner, lam=best_results['lam']).fit( KLrbf, Ytr) y_pred = clf.predict(KLrbf) accuracy = accuracy_score(Ytr, y_pred) print( 'accuracy on the test set: %.3f, with lambda=%.2f' % (accuracy, best_results['lam'])) print(scores) pickle_out_filename = os.path.join( mainPath, "ExperimentCommonLocs/CrossValidationResults", "_".join((symbol, 'feature_label_date', str(select_feature_label_date), str(select_label_idx), 'hmm_date:', hmm_date_key, 'RBF', 'MultiKernelSVC.pkl'))) # pickle_out = open(pickle_out_filename, 'wb') # pickle.dump(best_results, pickle_out) # pickle_out.close() except ValueError: continue else: print('PROBLEM----->in one of of your locations') continue
def Learning_curve_using_weather_data(): ''' Cross validation using weather data: PASS: 2021.02.05 ''' # load data print('loading dataset...', end='') # from sklearn.datasets import load_breast_cancer as load # ds = load() # X, Y = ds.data, ds.target # # Files training_data = io.loadmat( r"D:\CVProject\CBAM-keras-master\handcraft\features_with_pca.mat") # training_data = io.loadmat(r"D:\CVProject\CBAM-keras-master\handcraft\features_with_pca_file.mat") # training_data = io.loadmat(r"D:\CVProject\CBAM-keras-master\handcraft\features_with_pca_file_0202.mat") results_data = open( r"D:\CVProject\CBAM-keras-master\handcraft\results\learning_curve_results_0202_01.txt", "w") # length = len(training_data['array'][0]) length = len(training_data['array'][0]) # X, Y = training_data['array'][:, 0:length - 1], training_data['array'][:, -1] X, Y = training_data['array'][:, 0:length - 1], training_data['array'][:, -1] print('done') # preprocess data print('preprocessing data...', end='') from MKLpy.preprocessing import normalization, rescale_01 X = rescale_01(X) # feature scaling in [0,1] X = normalization(X) # ||X_i||_2^2 = 1 print('done') from MKLpy.algorithms import EasyMKL, KOMD # KOMD is not a WeatherClsMKL algorithm but a simple kernel machine like the SVM from MKLpy.model_selection import cross_val_score from sklearn.svm import SVC import numpy as np # base_learner = SVC(C=10000) # "hard"-margin svm print("Build a base learner") base_learner = SVC(C=20) # "hard"-margin svm # # # === parameters selection === # best_results = {} # # for lam in [0, 0.01, 0.1, 0.2, 0.9, 1]: # possible lambda values for the EasyMKL algorithm # for lam in [0]: # possible lambda values for the EasyMKL algorithm # # MKLpy.model_selection.cross_val_score performs the cross validation automatically, it may returns # # accuracy, auc, or F1 scores # # evaluation on the test set # print("Model training with lam {}".format(lam)) # clf = EasyMKL(lam=0.1, multiclass_strategy='ova', learner=base_learner).fit(KLtr, Ytr) # scores = cross_val_score(KLtr, Ytr, clf, n_folds=5, scoring='accuracy') # acc = np.mean(scores) # if not best_results or best_results['score'] < acc: # best_results = {'lam': lam, 'score': acc} print("Build EasyMKL classifier") # clf = EasyMKL(lam=0.1, multiclass_strategy='ova', learner=base_learner).fit(KLtr, Ytr) # scores = cross_val_score(KLtr, Ytr, clf, n_folds=5, scoring='accuracy') # acc = np.mean(scores) # print("acc:", acc) # ====== Learning curve ======= # # X1_tr = numpy.array(Xtr[:, :2]) # time # X2_tr = numpy.array(Xtr[:, 2:92]) # color # X3_tr = numpy.array(Xtr[:, 92:124]) # Gabor # X4_tr = numpy.array(Xtr[:, 124:156]) # lbp # X5_tr = numpy.array(Xtr[:, 156:348]) # cloud # X6_tr = numpy.array(Xtr[:, 348:432]) # haze # X7_tr = numpy.array(Xtr[:, 432:603]) # contrast # X8_tr = numpy.array(Xtr[:, 603:651]) # shadow # X9_tr = numpy.array(Xtr[:, 606:683]) # snow # X10_tr = numpy.array(Xtr[:, 683:]) # pca # # X1_te = numpy.array(Xte[:, :2]) # time # X2_te = numpy.array(Xte[:, 2:92]) # color # X3_te = numpy.array(Xte[:, 92:124]) # Gabor # X4_te = numpy.array(Xte[:, 124:156]) # lbp # X5_te = numpy.array(Xte[:, 156:348]) # cloud # X6_te = numpy.array(Xte[:, 348:432]) # haze # X7_te = numpy.array(Xte[:, 432:603]) # contrast # X8_te = numpy.array(Xte[:, 603:651]) # shadow # X9_te = numpy.array(Xte[:, 606:683]) # snow # X10_te = numpy.array(Xte[:, 683:]) # pca # # # # # # all features # KLtr = Multiview_generator([X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr], kernel=pairwise.rbf_kernel) # KLte = Multiview_generator([X1_te, X2_te, X3_te, X4_te, X5_te, X6_te, X7_te, X8_te, X9_te, X10_te], [X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr], kernel=pairwise.rbf_kernel) # # KYtr = Ytr[:] # KYte = Yte[:] # for elem in [0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]: for elem in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]: # for elem in [1]: learn_count = int(elem * X.shape[0]) KLtr, KYtr, KLte, KYte = bulid_kernel_transform( X[:learn_count], Y[:learn_count]) train_count, test_count = len(KYtr), len(KYte) clf = EasyMKL(lam=0.1, multiclass_strategy='ova', learner=base_learner).fit(KLtr, KYtr) # scores = cross_val_score(KLtr, Ytr, clf, n_folds=5, scoring='accuracy') # acc = np.mean(scores) y_train_pred = clf.predict(KLtr) y_test_pred = clf.predict(KLte) train_set_accuracy = accuracy_score(KYtr, y_train_pred) tests_et_accuracy = accuracy_score(KYte, y_test_pred) # display the results print("Test on {0} train samples and {1} test samples,".format( train_count, test_count), end="") print( 'accuracy on the train set: %.3f and accuracy on the test set : %.3f' % (train_set_accuracy, tests_et_accuracy)) # save the results in txt print("Test on {0} train samples and {1} test samples,".format( train_count, test_count), end="", file=results_data) print( 'accuracy on the train set: %.3f and accuracy on the test set : %.3f' % (train_set_accuracy, tests_et_accuracy), file=results_data) # from sklearn.metrics import accuracy_score print('done') # ============================== pass # # # ===== evaluate the model ===== # # # Chose the model with high performance # # # Transform # X1_tr = numpy.array(Xtr[:, :2]) # time # X2_tr = numpy.array(Xtr[:, 2:92]) # color # X3_tr = numpy.array(Xtr[:, 92:124]) # Gabor # X4_tr = numpy.array(Xtr[:, 124:156]) # lbp # X5_tr = numpy.array(Xtr[:, 156:348]) # cloud # X6_tr = numpy.array(Xtr[:, 348:432]) # haze # X7_tr = numpy.array(Xtr[:, 432:603]) # contrast # X8_tr = numpy.array(Xtr[:, 603:606]) # shadow # X9_tr = numpy.array(Xtr[:, 606:608]) # snow # X10_tr = numpy.array(Xtr[:, 608:]) # pca # # X1_te = numpy.array(Xte[:, :2]) # time # X2_te = numpy.array(Xte[:, 2:92]) # color # X3_te = numpy.array(Xte[:, 92:124]) # Gabor # X4_te = numpy.array(Xte[:, 124:156]) # lbp # X5_te = numpy.array(Xte[:, 156:348]) # cloud # X6_te = numpy.array(Xte[:, 348:432]) # haze # X7_te = numpy.array(Xte[:, 432:603]) # contrast # X8_te = numpy.array(Xte[:, 603:606]) # shadow # X9_te = numpy.array(Xte[:, 606:608]) # snow # X10_te = numpy.array(Xte[:, 608:]) # pca # # # # all features # KLtr = Multiview_generator([X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr], kernel=pairwise.homogeneous_polynomial_kernel) # KLte = Multiview_generator([X1_te, X2_te, X3_te, X4_te, X5_te, X6_te, X7_te, X8_te, X9_te, X10_te], [X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr], kernel=pairwise.homogeneous_polynomial_kernel) # # KYtr = Ytr[:] # KYte = Yte[:] # # clf = EasyMKL(learner=base_learner, lam=0.1).fit(KLtr, KYtr) # y_train_pred = clf.predict(KLtr) # y_test_pred = clf.predict(KLte) # # train_set_accuracy = accuracy_score(KYtr, y_train_pred) # tests_et_accuracy = accuracy_score(KYte, y_test_pred) # # # print('accuracy on the test set: %.3f, with lambda=%.2f' % (accuracy, best_results['lam'])) # print('accuracy on the train set: %.3f, and accuracy on the test set : %.3f' % (train_set_accuracy, tests_et_accuracy)) # # ====================== pass
] KLte = [ pairwise.homogeneous_polynomial_kernel(Xte, Xtr, degree=d) for d in range(11) ] print('done') #MKL algorithms from MKLpy.algorithms import AverageMKL, EasyMKL, KOMD #KOMD is not a MKL algorithm but a simple kernel machine like the SVM print('training AverageMKL...', end='') clf = AverageMKL().fit(KLtr, Ytr) #a wrapper for averaging kernels print('done') K_average = clf.solution.ker_matrix #the combined kernel matrix print('training EasyMKL...', end='') clf = EasyMKL(lam=0.1).fit(KLtr, Ytr) #combining kernels with the EasyMKL algorithm #lam is a hyper-parameter in [0,1] print('done') print('the combination weights are:') print(clf.solution.weights) #evaluate the solution from sklearn.metrics import accuracy_score, roc_auc_score y_pred = clf.predict(KLte) #predictions y_score = clf.decision_function(KLte) #rank accuracy = accuracy_score(Yte, y_pred) roc_auc = roc_auc_score(Yte, y_score) print('Accuracy score: %.3f, roc AUC score: %.3f' % (accuracy, roc_auc)) #select the base-learner #MKL algorithms use a hard-margin SVM as base learned (or KOMD in the case of EasyMKL).
def MultiView_learning(): """MultiView learning""" print('loading dataset...', end='') training_data = io.loadmat( r"D:\CVProject\CBAM-keras-master\handcraft\features_with_pca_file_0202.mat" ) length = len(training_data['array'][0]) X, Y = training_data['array'][:, 0:length - 2], training_data['array'][:, -1] print('done') # preprocess data print('preprocessing data...', end='') from MKLpy.preprocessing import normalization, rescale_01 X = rescale_01(X) # feature scaling in [0,1] X = normalization(X) # ||X_i||_2^2 = 1 # train/test split from sklearn.model_selection import train_test_split Xtr, Xte, Ytr, Yte = train_test_split(X, Y, test_size=.1, random_state=42, shuffle=True) print(numpy.array(Xtr).shape) print(numpy.array(Ytr).shape) print('done') print('Training on {0} samples, Testing on {1} samples'.format( len(Xtr), len(Xte))) print('computing RBF Kernels...', end='') from MKLpy.metrics import pairwise from MKLpy.generators import Multiview_generator X1_tr = numpy.array(Xtr[:, :2]) # time X2_tr = numpy.array(Xtr[:, 2:92]) # color X3_tr = numpy.array(Xtr[:, 92:124]) # Gabor X4_tr = numpy.array(Xtr[:, 124:156]) # lbp X5_tr = numpy.array(Xtr[:, 156:348]) # cloud X6_tr = numpy.array(Xtr[:, 348:432]) # haze X7_tr = numpy.array(Xtr[:, 432:603]) # contrast X8_tr = numpy.array(Xtr[:, 603:606]) # shadow X9_tr = numpy.array(Xtr[:, 606:608]) # snow X10_tr = numpy.array(Xtr[:, 608:]) # pca X1_te = numpy.array(Xte[:, :2]) # time X2_te = numpy.array(Xte[:, 2:92]) # color X3_te = numpy.array(Xte[:, 92:124]) # Gabor X4_te = numpy.array(Xte[:, 124:156]) # lbp X5_te = numpy.array(Xte[:, 156:348]) # cloud X6_te = numpy.array(Xte[:, 348:432]) # haze X7_te = numpy.array(Xte[:, 432:603]) # contrast X8_te = numpy.array(Xte[:, 603:606]) # shadow X9_te = numpy.array(Xte[:, 606:608]) # snow X10_te = numpy.array(Xte[:, 608:]) # pca KLtr = Multiview_generator([ X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr ], kernel=pairwise.rbf_kernel) KLte = Multiview_generator([ X1_te, X2_te, X3_te, X4_te, X5_te, X6_te, X7_te, X8_te, X9_te, X10_te ], [X1_tr, X2_tr, X3_tr, X4_tr, X5_tr, X6_tr, X7_tr, X8_tr, X9_tr, X10_tr], kernel=pairwise.rbf_kernel) print('done') from MKLpy.algorithms import AverageMKL, EasyMKL print('training EasyMKL with one-vs-all multiclass strategy...', end='') from sklearn.svm import SVC base_learner = SVC(C=8) clf = EasyMKL(lam=0.1, multiclass_strategy='ova', learner=base_learner).fit(KLtr, Ytr) print('the combination weights are:') for sol in clf.solution: print('(%d vs all): ' % sol, clf.solution[sol].weights) from sklearn.metrics import accuracy_score, roc_auc_score, recall_score, confusion_matrix y_pred = clf.predict(KLte) # predictions y_score = clf.decision_function(KLte) # rank accuracy = accuracy_score(Yte, y_pred) print('Accuracy score: %.4f' % (accuracy)) recall = recall_score(Yte, y_pred, average='macro') print('Recall score: %.4f' % (recall)) cm = confusion_matrix(Yte, y_pred) print('Confusion matrix', cm) print('training EasyMKL with one-vs-one multiclass strategy...', end='') clf = EasyMKL(lam=0.1, multiclass_strategy='ovo', learner=base_learner).fit(KLtr, Ytr) print('done') print('the combination weights are:') for sol in clf.solution: print('(%d vs %d): ' % (sol[0], sol[1]), clf.solution[sol].weights) y_pred = clf.predict(KLte) # predictions y_score = clf.decision_function(KLte) # rank accuracy = accuracy_score(Yte, y_pred) print('Accuracy score: %.4f' % (accuracy)) recall = recall_score(Yte, y_pred, average='macro') print('Recall score: %.4f' % (recall)) cm = confusion_matrix(Yte, y_pred) print('Confusion matrix', cm)
# print(base_learner) ########################################################################################### best_results = {} for lam in [0, 0.0001, 0.0009, 0.001, 0.009, 0.01, 0.09, 0.1, 0.2, 0.9, 1]: base_learner = GridSearchCV(svm.SVC(probability=True), param_grid=param_grid, cv=cv, refit='AUC', error_score=0, pre_dispatch='1*n_jobs', n_jobs=1) scores = cross_val_score(k1, y_tr_A, EasyMKL(learner=base_learner, lam=lam), cv=cv, n_folds=5, scoring='accuracy') # print(lam, scores) acc = np.mean(scores) if not best_results or best_results['score'] < acc: best_results = {'lam': lam, 'score': acc} # EasyMKL-BASED ############################################################################################# clf = EasyMKL(learner=base_learner, lam=best_results['lam']).fit(k1 + k2 + k3 + k4 + k5 + k6, y_tr_A) print(clf) ############################################################################################# # evaluate the solution
import numpy as np ds = load_iris() X, Y = ds.data, ds.target classes = np.unique(Y) print('done [%d classes]' % len(classes)) ''' WARNING: be sure that your matrix is not sparse! EXAMPLE: from sklearn.datasets import load_svmlight_file X,Y = load_svmlight_file(...) X = X.toarray() ''' #compute homogeneous polynomial kernels with degrees 0,1,2,...,10. print('computing Homogeneous Polynomial Kernels...', end='') from MKLpy.metrics import pairwise KL = [pairwise.homogeneous_polynomial_kernel(X, degree=d) for d in range(1, 4)] print('done') #MKL algorithms from MKLpy.algorithms import EasyMKL print('training EasyMKL...', end='') clf = EasyMKL(lam=0.1, multiclass_strategy='ovo').fit( KL, Y) #combining kernels with the EasyMKL algorithm #multiclass_strategy should be 'ovo' for one-vs-one decomposition strategy, and 'ova' for one-vs-all/rest strategy print('done') print(clf.weights)