Exemplo n.º 1
0
	def test_identity(self):
		KLtr_g_i  = Multiview_generator(self.XLtr, kernel=self.kf, include_identity=True)
		KLte_g_i = Multiview_generator(self.XLte, self.XLtr, kernel=self.kf, include_identity=True)
		I = misc.identity_kernel(len(self.Xtr))
		Z = torch.zeros(len(self.Xte), len(self.Xtr))
		self._check_lists(KLtr_g_i, self.KLtr + [I])
		self._check_lists(KLte_g_i, self.KLte + [Z])
Exemplo n.º 2
0
	def test_identity(self):
		KLtr_g_i  = Lambda_generator(self.Xtr, kernels=self.funcs, include_identity=True)
		KLte_g_i = Lambda_generator(self.Xte, self.Xtr, kernels=self.funcs, include_identity=True)
		I = misc.identity_kernel(len(self.Xtr))
		Z = torch.zeros(len(self.Xte), len(self.Xtr))
		self._check_lists(KLtr_g_i, self.KLtr + [I])
		self._check_lists(KLte_g_i, self.KLte + [Z])
Exemplo n.º 3
0
 def test_spectral_ratio(self):
     self.assertRaises(SquaredKernelError, metrics.spectral_ratio, self.X,
                       self.Y)
     self.assertEqual(
         metrics.spectral_ratio(misc.identity_kernel(5), norm=False), 5**.5)
     self.assertEqual(
         metrics.spectral_ratio(misc.identity_kernel(9), norm=False), 9**.5)
     self.assertEqual(metrics.spectral_ratio(np.ones((5, 5)), norm=False),
                      1)
     self.assertEqual(
         metrics.spectral_ratio(np.ones((5, 5)) * 4, norm=False), 1)
     self.assertEqual(
         metrics.spectral_ratio(misc.identity_kernel(5), norm=True), 1)
     self.assertEqual(
         metrics.spectral_ratio(misc.identity_kernel(9), norm=True), 1)
     self.assertEqual(metrics.spectral_ratio(np.ones((5, 5)), norm=True), 0)
     self.assertEqual(
         metrics.spectral_ratio(np.ones((5, 5)) * 4, norm=True), 0)
Exemplo n.º 4
0
	def test_identity(self):
		KLtr_g_i  = RBF_generator(self.Xtr, gamma=self.gammavals, include_identity=True)
		KLte_g_i = RBF_generator(self.Xte, self.Xtr, gamma=self.gammavals, include_identity=True, cache=True)
		KLte_g_i_c = RBF_generator(self.Xte, self.Xtr, gamma=self.gammavals, include_identity=True, cache=False)
		I = misc.identity_kernel(len(self.Xtr))
		Z = torch.zeros(len(self.Xte), len(self.Xtr))
		self._check_lists(KLtr_g_i, self.KLtr + [I])
		self._check_lists(KLte_g_i, self.KLte + [Z])
		self._check_lists(KLte_g_i_c, self.KLte + [Z])
Exemplo n.º 5
0
	def test_identity(self):
		KLtr_g_i  = HPK_generator(self.Xtr, degrees=range(1,6), include_identity=True)
		KLte_g_i = HPK_generator(self.Xte, self.Xtr, degrees=range(1,6), include_identity=True, cache=True)
		KLte_g_i_c = HPK_generator(self.Xte, self.Xtr, degrees=range(1,6), include_identity=True, cache=False)
		I = misc.identity_kernel(len(self.Xtr))
		Z = torch.zeros(len(self.Xte), len(self.Xtr))
		self._check_lists(KLtr_g_i, self.KLtr + [I])
		self._check_lists(KLte_g_i, self.KLte + [Z])
		self._check_lists(KLte_g_i_c, self.KLte + [Z])
Exemplo n.º 6
0
 def setUp(self):
     data = load_breast_cancer()
     self.Xtr, self.Xte, self.Ytr, self.Yte = train_test_split(
         data.data, data.target, shuffle=True, train_size=50)
     self.Xtr = preprocessing.normalization(self.Xtr)
     self.Xte = preprocessing.normalization(self.Xte)
     self.KLtr = [
         pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=d)
         for d in range(5, 11)
     ] + [misc.identity_kernel(len(self.Xtr))]  #.Double()]
     self.KLte = [
         pairwise_mk.homogeneous_polynomial_kernel(
             self.Xte, self.Xtr, degree=d) for d in range(5, 11)
     ] + [torch.zeros(len(self.Xte), len(self.Xtr))
          ]  #, dtype=torch.double)]
     self.KLtr_g = HPK_generator(self.Xtr,
                                 degrees=range(5, 11),
                                 include_identity=True)
     self.KLte_g = HPK_generator(self.Xte,
                                 self.Xtr,
                                 degrees=range(5, 11),
                                 include_identity=True)
Exemplo n.º 7
0
from MKLpy.preprocessing import normalization
X = normalization(X)


from sklearn.model_selection import train_test_split
Xtr,Xte,Ytr,Yte = train_test_split(X,Y, test_size=.5, random_state=42)


from MKLpy.metrics import pairwise
from MKLpy.utils.misc import identity_kernel
import torch

#making 20 homogeneous polynomial kernels.
#I suggest to add the identity kernel in order to make the GRAM initial solution easily separable
#if the initial sol is not separable, GRAM may not work well
KLtr = [pairwise.homogeneous_polynomial_kernel(Xtr, degree=d) for d in range(1,11)] + [identity_kernel(len(Ytr))]
KLte = [pairwise.homogeneous_polynomial_kernel(Xte,Xtr, degree=d) for d in range(1,11)]
KLte.append(torch.zeros(KLte[0].size()))


from MKLpy.algorithms import GRAM
from MKLpy.scheduler import ReduceOnWorsening
from MKLpy.callbacks import EarlyStopping

earlystop = EarlyStopping(
	KLte, 
	Yte, 
	patience=100,
	cooldown=1, 
	metric='roc_auc',
)
Exemplo n.º 8
0
 def test_alignment_ID(self):
     self.assertLess(metrics.alignment_ID(self.K1), 1)
     self.assertAlmostEqual(
         metrics.alignment_ID(misc.identity_kernel(self.K1.shape[0])), 1)
     self.assertRaises(SquaredKernelError, metrics.alignment_ID, self.X)
Exemplo n.º 9
0
def fitting_function_mkl(key):
    print('For key: ', key, '############')
    labels_file_path = os.path.join(
        symbolData.symbol_specific_label_path(label_idx), key + ".csv")
    print(os.path.isfile(labels_file_path))
    output_dict = defaultdict(dict)

    if os.path.isfile(labels_file_path):  # check that this is a real path
        print(" reading labels")  # this is the labels path!
        labels = pd.read_csv(labels_file_path)
        label_name = str(
            labels.columns[labels.columns.str.contains(pat='label')].values[0])
        logmemoryusage("Before garbage collect")
        hmm_features = nfu.hmm_features_df(
            open_pickle_filepath(symbol_feature_paths[key]))

        if hmm_features.isnull().values.all(
        ):  # checking that the HMM features are actually not null
            pass
            print('lots of NaNs on features')
        else:  # if features not null then start moving on!
            print("can train")
            market_features_df = CreateMarketFeatures(
                CreateMarketFeatures(
                    CreateMarketFeatures(df=CreateMarketFeatures(
                        df=labels).ma_spread_duration()).ma_spread()).
                chaikin_mf()).obv_calc()  # market features dataframe

            df_concat = pd.DataFrame(
                pd.concat([hmm_features, market_features_df],
                          axis=1,
                          sort='False').dropna())

            df = df_concat[df_concat[label_name].notna()]
            df_final = df.drop(columns=[
                'TradedPrice', 'Duration', 'TradedTime', 'ReturnTradedPrice',
                'Volume', label_name
            ])

            y_train = df.reindex(columns=df.columns[df.columns.str.contains(
                pat='label')])  # training labels
            print('go to the labels')

            if df_final.shape[0] < 10:
                print(
                    ' the ratio of classes is too low. try another label permutation'
                )
                # problem_dict[hmm_date][key] = str(key)
                pass
            else:
                print("starting model fit")

                Xtr, Xte, Ytr, Yte = train_test_split(df_final,
                                                      y_train,
                                                      test_size=.2,
                                                      random_state=42)
                # training
                arrXtr = np.array(Xtr)
                X_tr = normalization(rescale_01(arrXtr))
                Y_tr = torch.Tensor(Ytr.values.ravel())

                # testing

                arrXte = np.array(Xte)
                X_te = normalization(rescale_01(arrXte))
                Y_te = torch.Tensor(Yte.values.ravel())

                KLtr = [
                    pairwise.homogeneous_polynomial_kernel(X_tr, degree=d)
                    for d in range(1, 11)
                ] + [identity_kernel(len(Y_tr))]
                KLte = [
                    pairwise.homogeneous_polynomial_kernel(X_te,
                                                           X_tr,
                                                           degree=d)
                    for d in range(1, 11)
                ]
                KLte.append(torch.zeros(KLte[0].size()))
                print('done with kernel')
                try:
                    lam_values = [0.1, 0.2, 1]
                    best_results = {}
                    C_range = [0.1, 1]
                    for C_ch in C_range:
                        base_learner = SVC(C=C_ch)  # "soft"-margin svm
                        print(' fitted the base learner')
                        # possible lambda values for the EasyMKL algorithm
                        for lam in lam_values:
                            print('now here', lam)
                            print(' and tuning lambda for EasyMKL...', end='')
                            base_learner = SVC(C=C_ch)  # "soft"-margin svm
                            # MKLpy.model_selection.cross_val_score performs the cross validation automatically,
                            # it may returns accuracy, auc, or F1 scores
                            scores = cross_val_score(KLtr,
                                                     Y_tr,
                                                     EasyMKL(
                                                         learner=base_learner,
                                                         lam=lam),
                                                     n_folds=5,
                                                     scoring='accuracy')
                            acc = np.mean(scores)
                            if not best_results or best_results['score'] < acc:
                                best_results = {'lam': lam, 'score': acc}
                            # evaluation on the test set

                            print('done', best_results)
                            cv_dict_list[(symbol, hmm_date,
                                          label_idx)][(lam, C_ch)] = [
                                              scores, best_results
                                          ]
                            print(cv_dict_list)

                            pickle_out_filename = os.path.join(
                                mainPath,
                                "ExperimentCommonLocs/MKLFittedModels",
                                "_".join((symbol, 'model_fit_date', str(key),
                                          str(alternate_labels_nos[label_idx]),
                                          'MultiKernelSVC.pkl')))
                            print(pickle_out_filename)

                            pickle_out = open(pickle_out_filename, 'wb')

                            pickle.dump(cv_dict_list, pickle_out)
                            pickle_out.close()

                except (ValueError, TypeError, EOFError):
                    pass