Python HSICLasso.regression примеры использования

Язык программирования: Python

Пространство имен/Пакет: pyHSICLasso

Класс/Тип: HSICLasso

Метод/Функция: regression

Примеров на hotexamples.com: 12

Python HSICLasso.regression - 12 примеров найдено. Это лучшие примеры Python кода для pyHSICLasso.HSICLasso.regression, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

input(21)

HSICLasso(18)

regression(12)

classification(11)

get_index(8)

dump(5)

get_features(5)

plot_path(3)

get_index_score(3)

get_index_neighbors_score(2)

linkage(2)

plot_heatmap(2)

get_index_neighbors(2)

Y_in(1)

plot_dendrogram(1)

_input_data_list(1)

_check_args(1)

_input_data_ndarray(1)

X_in(1)

featname(1)

_check_shape(1)

save_param(1)

Пример #1

Показать файл

def main():
    hsic_lasso = HSICLasso()
    hsic_lasso.input("../tests/test_data/csv_data_mv.csv",
                     output_list=['output1', 'output2'])
    hsic_lasso.regression(5)
    hsic_lasso.dump()
    hsic_lasso.plot_path()

Пример #2

Показать файл

def main():
    hsic_lasso = HSICLasso()
    hsic_lasso.input("../tests/test_data/matlab_data.mat")

    #Single core processing
    hsic_lasso.regression(5, n_jobs=1)

    #Multi-core processing. Use all available cores (default)
    hsic_lasso.regression(5, n_jobs=-1)

Пример #3

Показать файл

def hsic(num_features, hsic_data, method='regression'):
    hsic_lasso = HSICLasso()
    hsic_lasso.input(hsic_data)

    if method == 'regression':
        hsic_lasso.regression(num_features)
    else:
        hsic_lasso.classification(num_features)

    return hsic_lasso.get_features()

Пример #4

Показать файл

def hsic_sel(csv, no_features, method='classification'):
    hsic_lasso = HSICLasso()
    hsic_lasso.input(csv)

    if method == 'regression':
        hsic_lasso.regression(no_features)
    else:
        hsic_lasso.classification(no_features)

    return hsic_lasso.get_features()

Пример #5

Показать файл

Файл: sample_heatmap.py Проект: 15754311016/SignedLasso

def main():
    hsic_lasso = HSICLasso()
    hsic_lasso.input("../tests/test_data/matlab_data.mat")

    #max_neighbors=0 means that we only use the HSIC Lasso features to plot heatmap
    hsic_lasso.regression(5, max_neighbors=0)

    #Compute linkage
    hsic_lasso.linkage()

    #Run Hierarchical clustering
    # Features are clustered by using HSIC scores
    # Samples are clusterd by using Euclid distance
    hsic_lasso.plot_heatmap()

Пример #6

Показать файл

def main():

    #Numpy array input example
    hsic_lasso = HSICLasso()
    data = sio.loadmat("../tests/test_data/matlab_data.mat")
    X = data['X'].transpose()
    Y = data['Y'][0]
    featname = ['Feat%d' % x for x in range(1, X.shape[1] + 1)]

    hsic_lasso.input(X, Y, featname=featname)
    hsic_lasso.regression(5)
    hsic_lasso.dump()
    hsic_lasso.plot_path()

    #Save parameters
    hsic_lasso.save_param()

Пример #7

Показать файл

def main():
    hsic_lasso = HSICLasso()
    #out_list = ['c'+str(i) for i in range(1,51)]
    #print (out_list)
    hsic_lasso.input("./user_data_new.csv",
                     output_list=[
                         'c1', 'c2', 'c3', 'c4', 'c5,', 'c6', 'c7', 'c8', 'c9',
                         'c10'
                     ])
    # ,'c11', 'c12', 'c13', 'c14', 'c15,', 'c16', 'c17', 'c18', 'c19', 'c20','c21', 'c22', 'c23', 'c24', 'c25,', 'c26', 'c27', 'c28', 'c29', 'c30'])
    hsic_lasso.regression(100, B=50)
    hsic_lasso.dump()
    select_index = hsic_lasso.get_index()
    print(select_index)
    print(hsic_lasso.get_index_score())
    #hsic_lasso.plot_path()
    print(hsic_lasso.get_features())
    X_select = hsic_lasso.X_in[select_index, :]
    np.savetxt('X_select.txt', X_select, fmt=str('%.5f'), encoding='utf-8')

Пример #8

Показать файл

Файл: FeatureSelection.py Проект: vd1371/XProject

    def HSICLasso(self):

        df_ = self.data.copy()
        cols = list(df_.columns)[:-1] + ['class']
        df_.columns = cols

        hsic_lasso = HSICLasso()
        hsic_lasso.input(self.X_train.values, self.Y_train.values)

        if self.type == CLASSIFICATION:
            hsic_lasso.classification(self.num_top_features)
        elif self.type == REGRESSION:
            hsic_lasso.regression(self.num_top_features)

        feats = [
            df_.columns[int(val) - 1] for val in hsic_lasso.get_features()
        ]

        for feat, imp in zip(feats, hsic_lasso.get_index_score()):
            features_[feat] = imp
        self.report_feature_importance(features_,
                                       self.num_top_features,
                                       label="HSICLasso")

Пример #9

Показать файл

    def hsic_lasso_matric(self,
                          data,
                          n_jobs=2,
                          n_sample=False,
                          frac_sample=False):
        '''Calculate hsic lasso (subtract correlation between explanatory variables).
        Since the correlation coefficient matrix is not symmetric, it is viewed in the row direction.
        The correlation between variable 0 and the other variable is stored as the component on the 0th row,
        and the correlation between variable 1 and the other variable is stored as the component on the first row.
        
        n_jobs : (int) Indicates the number of cores to be calculated. -1 for GPU.
        data: (numpy or pandas) A data frame that contains all explanatory and objective variables
        n_sample : (int) How much random sampling to do. False if not.
        If a numerical value is entered, sampling is performed using that number of rows.
        frac_sample: [0 ~ 1] (float) Sampled as a percentage of the number of rows. Not used at the same time as n_sample.
        '''
        data = copy(data)
        data = pd.DataFrame(data).dropna()
        # Sampling when n_sample contains a numerical value
        if not n_sample:
            if not frac_sample:
                # n_sample=False, frac_sample=False
                pass
            else:
                # n_sample=False, frac_sample=int
                data = data.sample(frac=frac_sample, replace=True)
        else:

            if not frac_sample:
                # n_sample=int, frac_sample=False
                data = data.sample(n=n_sample, replace=True)
            else:
                # n_sample=int, frac_sample=int
                raise ValueError(
                    'Please enter a value for `frac` OR `n`, not both')

        data = check_array(data, accept_sparse="csc",
                           dtype=float)  # Convert to numpy.ndarray
        n_col = data.shape[1]
        hsic_array = np.empty((0, n_col - 1), float)
        for i in range(n_col):
            X = np.delete(data, obj=i, axis=1)
            y = data[:, i]

            # Calculation of hsic_lasso
            hsic_lasso = HSICLasso()
            hsic_lasso.input(X, y)
            hsic_lasso.regression(num_feat=X.shape[1],
                                  discrete_x=False,
                                  n_jobs=n_jobs)
            # hsic_lasso only appears in descending order of score, so sort
            hsic_ = np.array(
                [hsic_lasso.get_index(),
                 hsic_lasso.get_index_score()])
            hsic_ = hsic_.T  # Transpose because it is difficult to use
            # Since there are not enough scores that came out, add 0.0 to the index to complement
            lack_set = set([x for x in range(X.shape[1])]) - set(hsic_[:, 0])
            for lack in lack_set:
                lack_list = np.array([[lack, 0.0]])
                hsic_ = np.append(hsic_, lack_list, axis=0)
            hsic_ = hsic_[np.argsort(hsic_[:, 0])]  # Sort by index
            hsic_array = np.append(hsic_array,
                                   hsic_[:, 1].reshape(1, -1),
                                   axis=0)
        # Since it does not include the correlation component with itself, add 1.0
        n_row = hsic_array.shape[0]
        for i in range(n_row):
            insert_i = (n_row + 1) * i
            hsic_array = np.insert(hsic_array, insert_i, 1.0)
        self.hsic_lasso = hsic_array.reshape(n_row, -1)
        return self.hsic_lasso

Пример #10

Показать файл

Файл: sample.py Проект: pgsrv/pyHSICLasso

def main():
    hsic_lasso = HSICLasso()
    hsic_lasso.input("../tests/test_data/matlab_data.mat")
    hsic_lasso.regression(5)
    hsic_lasso.dump()
    hsic_lasso.plot_path()

Пример #11

Показать файл

Файл: test_regression.py Проект: ukwksk/pyHSICLasso

class RegressionTest(unittest.TestCase):
    def setUp(self):
        self.hsic_lasso = HSICLasso()

    def test_regression(self):

        np.random.seed(0)

        with self.assertRaises(UnboundLocalError):
            self.hsic_lasso.regression()

        self.hsic_lasso.input("./tests/test_data/matlab_data.mat")
        self.hsic_lasso.regression(5, n_jobs = 1)
        self.assertEqual(self.hsic_lasso.A, [1099, 99, 199, 1299, 299])

        self.hsic_lasso.input("./tests/test_data/matlab_data.mat")
        self.hsic_lasso.regression(10, n_jobs = 1)
        self.assertEqual(self.hsic_lasso.A, [1099, 99, 199, 1299, 1477,
                                             1405, 1073, 299,1596, 358])

        # Blocks
        self.hsic_lasso.input("./tests/test_data/matlab_data.mat")
        B = int(self.hsic_lasso.X_in.shape[1]/2)
        self.hsic_lasso.regression(5, B, 10)
        self.assertEqual(self.hsic_lasso.A, [1099, 99, 199, 299, 1299])

        self.hsic_lasso.input("./tests/test_data/matlab_data.mat")
        B = int(self.hsic_lasso.X_in.shape[1]/2)
        self.hsic_lasso.regression(10, B, 10)
        self.assertEqual(self.hsic_lasso.A, [1099, 99, 199, 1477, 299,
                                             1299, 1073, 1405, 358, 1596])

        # use non-divisor as block size
        with warnings.catch_warnings(record=True) as w:
        
            self.hsic_lasso.input("./tests/test_data/csv_data.csv")
            B = int(self.hsic_lasso.X_in.shape[1]/2) - 1
            n = self.hsic_lasso.X_in.shape[1]
            numblocks = n / B
            
            self.hsic_lasso.regression(10, B, 10)
            self.assertEqual(self.hsic_lasso.A, [1422, 248, 512, 1581, 1670,
                                                 764, 1771, 896, 779, 398])
            self.assertEqual(len(w), 1)
            self.assertEqual(w[-1].category, RuntimeWarning)
            self.assertEqual(str(w[-1].message), "B {} must be an exact divisor of the \
number of samples {}. Number of blocks {} will be approximated to {}.".format(B, n, numblocks, int(numblocks)))

Пример #12

Показать файл

def featureSelection(X, y, method = 'lasso', select = 500):
    
    t0 = time.time()
    
    # sparse (15 seconds)
    if method == 'lasso':
        from sklearn import linear_model
        
        a = 0.861 if select == 500 else 0.0755
        lasso = linear_model.Lasso(alpha = a)
        lasso.fit(X,y)
        XSelected = X[:,lasso.coef_ != 0]
        indices = np.where(lasso.coef_ != 0)
        if indices > select:
            indices = np.argsort(-lasso.coef_)[:select]
    
    # non-sparse (157 seconds)
    if method == 'rf':
        from sklearn.ensemble import ExtraTreesRegressor
        from sklearn.datasets import load_iris
        from sklearn.feature_selection import SelectFromModel
        
        t = ExtraTreesRegressor(n_estimators=50)
        t.fit(X, y)
        model = SelectFromModel(t, prefit=True,
                                max_features = select)
        XSelected = model.transform(X)
        indices = np.where(model.get_support)
    
    # non-sparse (8.5 seconds)
    if method == 'svm':
        from sklearn.svm import SVR
        from sklearn.feature_selection import SelectFromModel
        
        SVMReg = SVR(kernel = 'linear',
                     gamma='scale', C=1.0, epsilon=0.2)
        SVMReg.fit(X, y)
        model = SelectFromModel(SVMReg, prefit=True, 
                                max_features = select)
        XSelected = model.transform(X)
        indices = np.where(model.get_support())
    
    # wrapper model (preset number of features) (1000 seconds / 5000 seconds)
    if method == 'hsiclasso':
        from pyHSICLasso import HSICLasso
        
        hsic_lasso = HSICLasso()
        hsic_lasso.input(X,y)
        hsic_lasso.regression(select)
        XSelected = X[:,hsic_lasso.get_index()]
        indices = hsic_lasso.get_index()

    # dimensionality reduction
        # PCA
        # MDS
        # PLS
        # DWT
        
#    f = h5py.File('selected/' + str(select) + '/X_' + method + '.hdf5', "w")
#    f.create_dataset('X', data=XSelected)
#    f.create_dataset('indices', data=indices)
#    f.close()

    # return indices
    np.savetxt('selected/' + str(select) + '/X_' + method + '.dat', indices)
    
    # np.savetxt('selected/' + str(select) + '/X_' + method + '.dat', XSelected)

    print("--- %s seconds ---" % (time.time() - t0))