def test_type(self): Kte_torch = pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=4) self.assertEqual(type(pairwise_mk.homogeneous_polynomial_kernel(self.Xtr)), torch.Tensor) self.assertEqual(type(pairwise_mk.homogeneous_polynomial_kernel(self.Xtr.tolist())), torch.Tensor) self.assertEqual(type(Kte_torch), torch.Tensor) self.assertTrue(matNear(Kte_torch, pairwise_mk.homogeneous_polynomial_kernel(self.Xte.numpy(), self.Xtr.numpy(), degree=4))) self.assertTrue(matNear(Kte_torch, pairwise_mk.homogeneous_polynomial_kernel(self.Xte.tolist(), self.Xtr.tolist(), degree=4)))
def setUp(self): data = load_digits() self.Xtr, self.Xte, self.Ytr, self.Yte = train_test_split( data.data, data.target, shuffle=True, train_size=.2) self.Xtr = preprocessing.normalization(self.Xtr) self.Xte = preprocessing.normalization(self.Xte) self.KLtr = [ pairwise.homogeneous_polynomial_kernel(self.Xtr, degree=d) for d in range(1, 11) ] self.KLte = [ pairwise.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=d) for d in range(1, 11) ]
def setUp(self): super().setUp() self.XLtr = [self.Xtr + i for i in range(5)] self.XLte = [self.Xte + i for i in range(5)] self.kf = lambda _X, _Z : pairwise_mk.homogeneous_polynomial_kernel(_X, _Z, degree=2) self.KLtr = [self.kf(X, X) for X in self.XLtr] self.KLte = [self.kf(Xt, X) for Xt,X in zip(self.XLte, self.XLtr)]
def setUp(self): data = load_breast_cancer() self.Xtr, self.Xte, self.Ytr, self.Yte = train_test_split( data.data, data.target, shuffle=True, train_size=50) self.Xtr = preprocessing.normalization(self.Xtr) self.Xte = preprocessing.normalization(self.Xte) self.KLtr = [ pairwise.homogeneous_polynomial_kernel(self.Xtr, degree=d) for d in range(1, 11) ] self.KLte = [ pairwise.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=d) for d in range(1, 11) ] self.KLtr_g = HPK_generator(self.Xtr, degrees=range(1, 6)) self.KLte_g = HPK_generator(self.Xte, self.Xtr, degrees=range(1, 6))
def test_HPK_test(self): Ktr = linear_kernel(self.Xtr) Kte = self.Xte.dot(self.Xtr.T) self.assertTrue( matNear( Kte, pairwise.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=1))) self.assertTrue( matNear( pairwise.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=4), polynomial_kernel(self.Xte, self.Xtr, degree=4, gamma=1, coef0=0)))
def setUp(self): data = load_breast_cancer() self.Xtr, self.Xte, self.Ytr, self.Yte = train_test_split( data.data, data.target, shuffle=True, train_size=50) self.Xtr = preprocessing.normalization(self.Xtr) self.Xte = preprocessing.normalization(self.Xte) self.KLtr = [ pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=d) for d in range(5, 11) ] + [misc.identity_kernel(len(self.Xtr))] #.Double()] self.KLte = [ pairwise_mk.homogeneous_polynomial_kernel( self.Xte, self.Xtr, degree=d) for d in range(5, 11) ] + [torch.zeros(len(self.Xte), len(self.Xtr)) ] #, dtype=torch.double)] self.KLtr_g = HPK_generator(self.Xtr, degrees=range(5, 11), include_identity=True) self.KLte_g = HPK_generator(self.Xte, self.Xtr, degrees=range(5, 11), include_identity=True)
def test_HPK_train(self): Ktr = self.Xtr.dot(self.Xtr.T) self.assertTrue(matNear(Ktr, linear_kernel(self.Xtr))) self.assertTrue( matNear(pairwise.homogeneous_polynomial_kernel(self.Xtr, degree=4), polynomial_kernel(self.Xtr, degree=4, gamma=1, coef0=0))) self.assertTrue( matNear(pairwise.homogeneous_polynomial_kernel(self.Xtr, degree=5), polynomial_kernel(self.Xtr, degree=5, gamma=1, coef0=0))) self.assertTrue( matNear(Ktr**3, polynomial_kernel(self.Xtr, degree=3, gamma=1, coef0=0))) self.assertTrue( matNear( pairwise.homogeneous_polynomial_kernel(self.Xtr, self.Xtr, degree=3), polynomial_kernel(self.Xtr, self.Xtr, degree=3, gamma=1, coef0=0)))
def setUp(self): super().setUp() self.funcs = [ pairwise_mk.linear_kernel, lambda X,Z : (X @ Z.T)**2, pairwise_mk.polynomial_kernel, lambda X,Z : pairwise_mk.polynomial_kernel(X, Z, degree=4), ] self.KLtr = [f(self.Xtr, self.Xtr) for f in self.funcs] self.KLte = [ pairwise_mk.linear_kernel(self.Xte, self.Xtr), pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=2), pairwise_mk.polynomial_kernel(self.Xte, self.Xtr), pairwise_mk.polynomial_kernel(self.Xte, self.Xtr, degree=4), ]
def test_comptuation(self): Ktr = pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=1) self.assertTrue(matNear(Ktr, pairwise_sk.linear_kernel(self.Xtr))) self.assertTrue(matNear( pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=4), pairwise_sk.polynomial_kernel(self.Xtr, degree=4, gamma=1, coef0=0))) self.assertTrue(matNear( pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=5), pairwise_sk.polynomial_kernel(self.Xtr, degree=5, gamma=1, coef0=0))) self.assertTrue(matNear(Ktr**3, pairwise_sk.polynomial_kernel(self.Xtr, degree=3, gamma=1, coef0=0))) self.assertTrue(matNear( pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, self.Xtr, degree=3), pairwise_sk.polynomial_kernel(self.Xtr, self.Xtr, degree=3, gamma=1, coef0=0))) self.assertTrue(matNear( pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=1), pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=1))) self.assertTrue(matNear( pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=4), pairwise_sk.polynomial_kernel(self.Xte, self.Xtr, degree=4, gamma=1, coef0=0)))
ds = load_iris() X,Y = ds.data, ds.target from MKLpy.preprocessing import normalization X = normalization(X) from sklearn.model_selection import train_test_split Xtr,Xte,Ytr,Yte = train_test_split(X,Y, test_size=.5, random_state=42) from MKLpy.metrics import pairwise from MKLpy.utils.matrices import identity_kernel import numpy as np #making 20 homogeneous polynomial kernels. #I suggest to add the identity kernel in order to make the GRAM initial solution easily separable #if the initial sol is not separable, GRAM may not work well KLtr = [pairwise.homogeneous_polynomial_kernel(Xtr, degree=d) for d in range(1,21)] + [identity_kernel(len(Ytr))] KLte = [pairwise.homogeneous_polynomial_kernel(Xte,Xtr, degree=d) for d in range(1,21)] KLte.append(np.zeros(KLte[0].shape)) from MKLpy.algorithms import GRAM from sklearn.svm import SVC #play with max iter (reduce the number if the problem is big) and learning rate! clf = GRAM(max_iter=1000, learner=SVC(C=1000), learning_rate=1).fit(KLtr,Ytr) print (clf.weights)
gc.collect() logmemoryusage("Before feature creation") Xtr, ytr, dataDate = featureCreation( i, HMMModelFeaturesLabelsCommon) Xte, yte, testDate = featureCreation( i + 1, HMMModelFeaturesLabelsCommon) logmemoryusage("After feature creation") print('Doing Dates', dataDate) if Xtr.shape[0] == ytr.shape[0]: logmemoryusage("Before starting training") print('Shapes Match- starting training ') # polynomial Kernels ## try: KLtr = [ pairwise.homogeneous_polynomial_kernel(Xtr, degree=d) for d in range(4) ] KLte = [ pairwise.homogeneous_polynomial_kernel(Xte, Xtr, degree=d) for d in range(4) ] print('done') # ''' Compute RBF Kernels''' # gamma_range = np.logspace(-9, 3, 13) # ker_list = [rbf_kernel(Xtr, gamma=g) for g in gamma_range] # and train 3 classifiers ### clf = AverageMKL().fit(
#preprocess data print ('preprocessing data...', end='') from MKLpy.preprocessing import normalization, rescale_01 X = rescale_01(X) #feature scaling in [0,1] X = normalization(X) #||X_i||_2^2 = 1 #train/test split from sklearn.model_selection import train_test_split Xtr,Xte,Ytr,Yte = train_test_split(X,Y, test_size=.25, random_state=42) print ('done') #compute homogeneous polynomial kernels with degrees 0,1,2,...,10. print ('computing Homogeneous Polynomial Kernels...', end='') from MKLpy.metrics import pairwise KLtr = [pairwise.homogeneous_polynomial_kernel(Xtr, degree=d) for d in range(11)] KLte = [pairwise.homogeneous_polynomial_kernel(Xte,Xtr, degree=d) for d in range(11)] print ('done') #evaluate kernels in terms of margin, radius etc... print ('evaluating metrics...', end='') from MKLpy.metrics import margin, radius, ratio, trace, frobenius from MKLpy.preprocessing import kernel_normalization deg = 5 K = KLtr[deg] #the HPK with degree 5 K = kernel_normalization(K) #normalize the kernel K (useless in the case of HPK computed on normalized data) score_margin = margin(K,Ytr) #the distance between the positive and negative classes in the kernel space score_radius = radius(K) #the radius of the Einimum Enclosing Ball containing data in the kernel space score_ratio = ratio (K,Ytr) #the radius/margin ratio defined as (radius**2/margin**2)/n_examples #the ratio can be also computed as score_radius**2/score_margin**2/len(Ytr)
for model_date in model_dates: start = time.time() # forward_dates = nalsvm.forwardDates(date_keys, model_date) print('---------------> Doing Model Date:', model_date) # put the features in a tensor format Xtr = rescale_01(torch.Tensor(pkl_file[model_date][0].values)) Xtr = normalization(Xtr) # fitting model # put the labels in a tensor format Ytr = torch.Tensor(pkl_file[model_date][1].values) # try: KLtr_poly = [pairwise.homogeneous_polynomial_kernel(Xtr, degree=d) for d in range(6)] deg = 5 K = KLtr_poly[deg] # the HPK with degree 5 # K is always a squared kernel matrix, i.e. it is not the kernel computed between test and training # examples. kernel_evaluation_dict = kernel_evaluation(K) print('done') print('results of the %d-degree HP kernel:' % deg) print('margin: %.4f, radius: %.4f, radiu-margin ratio: %.4f,' % (kernel_evaluation_dict['score_margin'], kernel_evaluation_dict['score_radius'], kernel_evaluation_dict['score_ratio'])) print('trace: %.4f, frobenius norm: %.4f' % (kernel_evaluation_dict['score_trace'], kernel_evaluation_dict['score_froben'])) kernel_evaluation_results = dict() kernel_evaluation_results[model_date] = kernel_evaluation_dict except(MKLpy.utils.exceptions.BinaryProblemError): pass
from MKLpy.preprocessing import normalization, rescale_01 X1 = rescale_01(X1) X1 = normalization(X1) # # train/test X_train_A, X_test_A, y_train_A, y_test_A = train_test_split(X1, y1, test_size=0.3, random_state=42) # Applying Polynomial kernel from MKLpy.metrics import pairwise k1 = [ pairwise.homogeneous_polynomial_kernel(X_train_A, degree=d) for d in range(5) ] k11 = [ pairwise.homogeneous_polynomial_kernel(X_test_A, X_train_A, degree=d) for d in range(5) ] ################################################################################################## from MKLpy.algorithms import EasyMKL from MKLpy.model_selection import cross_val_score from sklearn.model_selection import StratifiedKFold, GridSearchCV, LeaveOneOut from sklearn.metrics import make_scorer, accuracy_score from sklearn import svm from itertools import product
df_final = df.drop(columns=['TradedPrice', 'Duration', 'TradedTime', 'ReturnTradedPrice', \ 'Volume', label_name]) y_labels_train = df[df.columns[df.columns.str.contains(pat='label')]].iloc[:, 0] if df_final.shape[0] < 10: print(' the ratio of classes is too low. try another label permutation') continue else: try: X_train = MinMaxScaler().fit_transform(df_final) nalsvm.logmemoryusage("After feature creation") if X_train.shape[0] == y_labels_train.shape[0]: nalsvm.logmemoryusage("Before starting training") print('Shapes Match- starting training ') # polynomial Kernels ## try: KLtr = [pairwise.homogeneous_polynomial_kernel(X_train, degree=d) for d in range(4)] # KLte = [pairwise.homogeneous_polynomial_kernel(Xte, Xtr, degree=d) for d in range(4)] print('done') clf = AverageMKL().fit(KLtr, y_labels_train) # a wrapper for averaging kernels # print(clf.weights) # print the weights of the combination of base kernels print('training EasyMKL...for polynomials and RBF') clfEasy = EasyMKL(lam=0.1).fit(KLtr, y_labels_train) # combining kernels with the EasyMKL algorithm print('------') print('finished training') # somewhere here you need to do out of sample testing and then store all that symbolForwardDates = data_cls.forwardDates(joint_keys, joint_keys[joint_key_idx]) oos_svc_predictions = defaultdict(dict) # alias to store the data : symbol, joint Date, Label Used results_predict_alias = "_".join((symbol, joint_keys[joint_key_idx, nalsvm.labels_pickle_files[alternate_label_idx])) for forward_date_idx, forward_date in enumerate(symbolForwardDates):
def test_shape(self): self.assertTupleEqual(pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr).numpy().shape, (self.Xte.size()[0], self.Xtr.size()[0]) ) self.assertTupleEqual(pairwise_mk.homogeneous_polynomial_kernel(self.Xtr).numpy().shape, (self.Xtr.size()[0], self.Xtr.size()[0]) )
def fitting_function_mkl(key): print('For key: ', key, '############') labels_file_path = os.path.join( symbolData.symbol_specific_label_path(label_idx), key + ".csv") print(os.path.isfile(labels_file_path)) output_dict = defaultdict(dict) if os.path.isfile(labels_file_path): # check that this is a real path print(" reading labels") # this is the labels path! labels = pd.read_csv(labels_file_path) label_name = str( labels.columns[labels.columns.str.contains(pat='label')].values[0]) logmemoryusage("Before garbage collect") hmm_features = nfu.hmm_features_df( open_pickle_filepath(symbol_feature_paths[key])) if hmm_features.isnull().values.all( ): # checking that the HMM features are actually not null pass print('lots of NaNs on features') else: # if features not null then start moving on! print("can train") market_features_df = CreateMarketFeatures( CreateMarketFeatures( CreateMarketFeatures(df=CreateMarketFeatures( df=labels).ma_spread_duration()).ma_spread()). chaikin_mf()).obv_calc() # market features dataframe df_concat = pd.DataFrame( pd.concat([hmm_features, market_features_df], axis=1, sort='False').dropna()) df = df_concat[df_concat[label_name].notna()] df_final = df.drop(columns=[ 'TradedPrice', 'Duration', 'TradedTime', 'ReturnTradedPrice', 'Volume', label_name ]) y_train = df.reindex(columns=df.columns[df.columns.str.contains( pat='label')]) # training labels print('go to the labels') if df_final.shape[0] < 10: print( ' the ratio of classes is too low. try another label permutation' ) # problem_dict[hmm_date][key] = str(key) pass else: print("starting model fit") Xtr, Xte, Ytr, Yte = train_test_split(df_final, y_train, test_size=.2, random_state=42) # training arrXtr = np.array(Xtr) X_tr = normalization(rescale_01(arrXtr)) Y_tr = torch.Tensor(Ytr.values.ravel()) # testing arrXte = np.array(Xte) X_te = normalization(rescale_01(arrXte)) Y_te = torch.Tensor(Yte.values.ravel()) KLtr = [ pairwise.homogeneous_polynomial_kernel(X_tr, degree=d) for d in range(1, 11) ] + [identity_kernel(len(Y_tr))] KLte = [ pairwise.homogeneous_polynomial_kernel(X_te, X_tr, degree=d) for d in range(1, 11) ] KLte.append(torch.zeros(KLte[0].size())) print('done with kernel') try: lam_values = [0.1, 0.2, 1] best_results = {} C_range = [0.1, 1] for C_ch in C_range: base_learner = SVC(C=C_ch) # "soft"-margin svm print(' fitted the base learner') # possible lambda values for the EasyMKL algorithm for lam in lam_values: print('now here', lam) print(' and tuning lambda for EasyMKL...', end='') base_learner = SVC(C=C_ch) # "soft"-margin svm # MKLpy.model_selection.cross_val_score performs the cross validation automatically, # it may returns accuracy, auc, or F1 scores scores = cross_val_score(KLtr, Y_tr, EasyMKL( learner=base_learner, lam=lam), n_folds=5, scoring='accuracy') acc = np.mean(scores) if not best_results or best_results['score'] < acc: best_results = {'lam': lam, 'score': acc} # evaluation on the test set print('done', best_results) cv_dict_list[(symbol, hmm_date, label_idx)][(lam, C_ch)] = [ scores, best_results ] print(cv_dict_list) pickle_out_filename = os.path.join( mainPath, "ExperimentCommonLocs/MKLFittedModels", "_".join((symbol, 'model_fit_date', str(key), str(alternate_labels_nos[label_idx]), 'MultiKernelSVC.pkl'))) print(pickle_out_filename) pickle_out = open(pickle_out_filename, 'wb') pickle.dump(cv_dict_list, pickle_out) pickle_out.close() except (ValueError, TypeError, EOFError): pass
#preprocess data print('preprocessing data...', end='') from MKLpy.preprocessing import normalization, rescale_01 X = rescale_01(X) #feature scaling in [0,1] X = normalization(X) #||X_i||_2^2 = 1 #train/test split from sklearn.model_selection import train_test_split Xtr, Xte, Ytr, Yte = train_test_split(X, Y, test_size=.25, random_state=42) print('done') #compute homogeneous polynomial kernels with degrees 0,1,2,...,10. print('computing Homogeneous Polynomial Kernels...', end='') from MKLpy.metrics import pairwise KLtr = [ pairwise.homogeneous_polynomial_kernel(Xtr, degree=d) for d in range(11) ] KLte = [ pairwise.homogeneous_polynomial_kernel(Xte, Xtr, degree=d) for d in range(11) ] print('done') #MKL algorithms from MKLpy.algorithms import AverageMKL, EasyMKL, KOMD #KOMD is not a MKL algorithm but a simple kernel machine like the SVM print('training AverageMKL...', end='') clf = AverageMKL().fit(KLtr, Ytr) #a wrapper for averaging kernels print('done') K_average = clf.solution.ker_matrix #the combined kernel matrix print('training EasyMKL...', end='')
def setUp(self): super().setUp() self.KLtr = [pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=d) for d in range(1,6)] self.KLte = [pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=d) for d in range(1,6)]
test_size=0.3, random_state=42) X_tr_S, X_te_S, y_tr_S, y_te_S = train_test_split(X5, y5, test_size=0.3, random_state=42) X_tr_D, X_te_D, y_tr_D, y_te_D = train_test_split(X6, y6, test_size=0.3, random_state=42) # Applying Polynomial kernel from MKLpy.metrics import pairwise k1 = [ pairwise.homogeneous_polynomial_kernel(X_tr_A, degree=d) for d in range(5) ] k11 = [ pairwise.homogeneous_polynomial_kernel(X_te_A, X_tr_A, degree=d) for d in range(5) ] k2 = [ pairwise.homogeneous_polynomial_kernel(X_tr_F, degree=d) for d in range(5) ] k22 = [ pairwise.homogeneous_polynomial_kernel(X_te_F, X_tr_F, degree=d) for d in range(5) ] k3 = [ pairwise.homogeneous_polynomial_kernel(X_tr_AV, degree=d) for d in range(5) ]
import numpy as np ds = load_iris() X, Y = ds.data, ds.target classes = np.unique(Y) print('done [%d classes]' % len(classes)) ''' WARNING: be sure that your matrix is not sparse! EXAMPLE: from sklearn.datasets import load_svmlight_file X,Y = load_svmlight_file(...) X = X.toarray() ''' #compute homogeneous polynomial kernels with degrees 0,1,2,...,10. print('computing Homogeneous Polynomial Kernels...', end='') from MKLpy.metrics import pairwise KL = [pairwise.homogeneous_polynomial_kernel(X, degree=d) for d in range(1, 4)] print('done') #MKL algorithms from MKLpy.algorithms import EasyMKL print('training EasyMKL...', end='') clf = EasyMKL(lam=0.1, multiclass_strategy='ovo').fit( KL, Y) #combining kernels with the EasyMKL algorithm #multiclass_strategy should be 'ovo' for one-vs-one decomposition strategy, and 'ova' for one-vs-all/rest strategy print('done') print(clf.weights)