def test_identity(self): KLtr_g_i = Multiview_generator(self.XLtr, kernel=self.kf, include_identity=True) KLte_g_i = Multiview_generator(self.XLte, self.XLtr, kernel=self.kf, include_identity=True) I = misc.identity_kernel(len(self.Xtr)) Z = torch.zeros(len(self.Xte), len(self.Xtr)) self._check_lists(KLtr_g_i, self.KLtr + [I]) self._check_lists(KLte_g_i, self.KLte + [Z])
def test_identity(self): KLtr_g_i = Lambda_generator(self.Xtr, kernels=self.funcs, include_identity=True) KLte_g_i = Lambda_generator(self.Xte, self.Xtr, kernels=self.funcs, include_identity=True) I = misc.identity_kernel(len(self.Xtr)) Z = torch.zeros(len(self.Xte), len(self.Xtr)) self._check_lists(KLtr_g_i, self.KLtr + [I]) self._check_lists(KLte_g_i, self.KLte + [Z])
def test_spectral_ratio(self): self.assertRaises(SquaredKernelError, metrics.spectral_ratio, self.X, self.Y) self.assertEqual( metrics.spectral_ratio(misc.identity_kernel(5), norm=False), 5**.5) self.assertEqual( metrics.spectral_ratio(misc.identity_kernel(9), norm=False), 9**.5) self.assertEqual(metrics.spectral_ratio(np.ones((5, 5)), norm=False), 1) self.assertEqual( metrics.spectral_ratio(np.ones((5, 5)) * 4, norm=False), 1) self.assertEqual( metrics.spectral_ratio(misc.identity_kernel(5), norm=True), 1) self.assertEqual( metrics.spectral_ratio(misc.identity_kernel(9), norm=True), 1) self.assertEqual(metrics.spectral_ratio(np.ones((5, 5)), norm=True), 0) self.assertEqual( metrics.spectral_ratio(np.ones((5, 5)) * 4, norm=True), 0)
def test_identity(self): KLtr_g_i = RBF_generator(self.Xtr, gamma=self.gammavals, include_identity=True) KLte_g_i = RBF_generator(self.Xte, self.Xtr, gamma=self.gammavals, include_identity=True, cache=True) KLte_g_i_c = RBF_generator(self.Xte, self.Xtr, gamma=self.gammavals, include_identity=True, cache=False) I = misc.identity_kernel(len(self.Xtr)) Z = torch.zeros(len(self.Xte), len(self.Xtr)) self._check_lists(KLtr_g_i, self.KLtr + [I]) self._check_lists(KLte_g_i, self.KLte + [Z]) self._check_lists(KLte_g_i_c, self.KLte + [Z])
def test_identity(self): KLtr_g_i = HPK_generator(self.Xtr, degrees=range(1,6), include_identity=True) KLte_g_i = HPK_generator(self.Xte, self.Xtr, degrees=range(1,6), include_identity=True, cache=True) KLte_g_i_c = HPK_generator(self.Xte, self.Xtr, degrees=range(1,6), include_identity=True, cache=False) I = misc.identity_kernel(len(self.Xtr)) Z = torch.zeros(len(self.Xte), len(self.Xtr)) self._check_lists(KLtr_g_i, self.KLtr + [I]) self._check_lists(KLte_g_i, self.KLte + [Z]) self._check_lists(KLte_g_i_c, self.KLte + [Z])
def setUp(self): data = load_breast_cancer() self.Xtr, self.Xte, self.Ytr, self.Yte = train_test_split( data.data, data.target, shuffle=True, train_size=50) self.Xtr = preprocessing.normalization(self.Xtr) self.Xte = preprocessing.normalization(self.Xte) self.KLtr = [ pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=d) for d in range(5, 11) ] + [misc.identity_kernel(len(self.Xtr))] #.Double()] self.KLte = [ pairwise_mk.homogeneous_polynomial_kernel( self.Xte, self.Xtr, degree=d) for d in range(5, 11) ] + [torch.zeros(len(self.Xte), len(self.Xtr)) ] #, dtype=torch.double)] self.KLtr_g = HPK_generator(self.Xtr, degrees=range(5, 11), include_identity=True) self.KLte_g = HPK_generator(self.Xte, self.Xtr, degrees=range(5, 11), include_identity=True)
from MKLpy.preprocessing import normalization X = normalization(X) from sklearn.model_selection import train_test_split Xtr,Xte,Ytr,Yte = train_test_split(X,Y, test_size=.5, random_state=42) from MKLpy.metrics import pairwise from MKLpy.utils.misc import identity_kernel import torch #making 20 homogeneous polynomial kernels. #I suggest to add the identity kernel in order to make the GRAM initial solution easily separable #if the initial sol is not separable, GRAM may not work well KLtr = [pairwise.homogeneous_polynomial_kernel(Xtr, degree=d) for d in range(1,11)] + [identity_kernel(len(Ytr))] KLte = [pairwise.homogeneous_polynomial_kernel(Xte,Xtr, degree=d) for d in range(1,11)] KLte.append(torch.zeros(KLte[0].size())) from MKLpy.algorithms import GRAM from MKLpy.scheduler import ReduceOnWorsening from MKLpy.callbacks import EarlyStopping earlystop = EarlyStopping( KLte, Yte, patience=100, cooldown=1, metric='roc_auc', )
def test_alignment_ID(self): self.assertLess(metrics.alignment_ID(self.K1), 1) self.assertAlmostEqual( metrics.alignment_ID(misc.identity_kernel(self.K1.shape[0])), 1) self.assertRaises(SquaredKernelError, metrics.alignment_ID, self.X)
def fitting_function_mkl(key): print('For key: ', key, '############') labels_file_path = os.path.join( symbolData.symbol_specific_label_path(label_idx), key + ".csv") print(os.path.isfile(labels_file_path)) output_dict = defaultdict(dict) if os.path.isfile(labels_file_path): # check that this is a real path print(" reading labels") # this is the labels path! labels = pd.read_csv(labels_file_path) label_name = str( labels.columns[labels.columns.str.contains(pat='label')].values[0]) logmemoryusage("Before garbage collect") hmm_features = nfu.hmm_features_df( open_pickle_filepath(symbol_feature_paths[key])) if hmm_features.isnull().values.all( ): # checking that the HMM features are actually not null pass print('lots of NaNs on features') else: # if features not null then start moving on! print("can train") market_features_df = CreateMarketFeatures( CreateMarketFeatures( CreateMarketFeatures(df=CreateMarketFeatures( df=labels).ma_spread_duration()).ma_spread()). chaikin_mf()).obv_calc() # market features dataframe df_concat = pd.DataFrame( pd.concat([hmm_features, market_features_df], axis=1, sort='False').dropna()) df = df_concat[df_concat[label_name].notna()] df_final = df.drop(columns=[ 'TradedPrice', 'Duration', 'TradedTime', 'ReturnTradedPrice', 'Volume', label_name ]) y_train = df.reindex(columns=df.columns[df.columns.str.contains( pat='label')]) # training labels print('go to the labels') if df_final.shape[0] < 10: print( ' the ratio of classes is too low. try another label permutation' ) # problem_dict[hmm_date][key] = str(key) pass else: print("starting model fit") Xtr, Xte, Ytr, Yte = train_test_split(df_final, y_train, test_size=.2, random_state=42) # training arrXtr = np.array(Xtr) X_tr = normalization(rescale_01(arrXtr)) Y_tr = torch.Tensor(Ytr.values.ravel()) # testing arrXte = np.array(Xte) X_te = normalization(rescale_01(arrXte)) Y_te = torch.Tensor(Yte.values.ravel()) KLtr = [ pairwise.homogeneous_polynomial_kernel(X_tr, degree=d) for d in range(1, 11) ] + [identity_kernel(len(Y_tr))] KLte = [ pairwise.homogeneous_polynomial_kernel(X_te, X_tr, degree=d) for d in range(1, 11) ] KLte.append(torch.zeros(KLte[0].size())) print('done with kernel') try: lam_values = [0.1, 0.2, 1] best_results = {} C_range = [0.1, 1] for C_ch in C_range: base_learner = SVC(C=C_ch) # "soft"-margin svm print(' fitted the base learner') # possible lambda values for the EasyMKL algorithm for lam in lam_values: print('now here', lam) print(' and tuning lambda for EasyMKL...', end='') base_learner = SVC(C=C_ch) # "soft"-margin svm # MKLpy.model_selection.cross_val_score performs the cross validation automatically, # it may returns accuracy, auc, or F1 scores scores = cross_val_score(KLtr, Y_tr, EasyMKL( learner=base_learner, lam=lam), n_folds=5, scoring='accuracy') acc = np.mean(scores) if not best_results or best_results['score'] < acc: best_results = {'lam': lam, 'score': acc} # evaluation on the test set print('done', best_results) cv_dict_list[(symbol, hmm_date, label_idx)][(lam, C_ch)] = [ scores, best_results ] print(cv_dict_list) pickle_out_filename = os.path.join( mainPath, "ExperimentCommonLocs/MKLFittedModels", "_".join((symbol, 'model_fit_date', str(key), str(alternate_labels_nos[label_idx]), 'MultiKernelSVC.pkl'))) print(pickle_out_filename) pickle_out = open(pickle_out_filename, 'wb') pickle.dump(cv_dict_list, pickle_out) pickle_out.close() except (ValueError, TypeError, EOFError): pass