Пример #1
0
def train_data_format(df):

    y = df['type'].values

    df = df.drop(['id', 'type'], axis=1)

    kpca = KernelPCA(n_components=3, kernel='rbf', degree=2, gamma=0.1)
    transf = kpca.fit_transform(
        df.drop(
            [
                'bl_lt_0.2',
                'bl_mt_0.7',
                #'hl_lt_0.2',
                'hl_mt_0.7',
                #                                         'rf_lt_0.2',
                #                                         'rf_mt_0.8',
                #                                         'hs_mt_0.7',
                #                                         'hs_lt_0.2'
            ],
            axis=1).values)

    x = df.values
    x = np.hstack((x, transf))

    print(df.dtypes)

    return x, y
Пример #2
0
 def __init__(self,
              n_components=None,
              kernel='linear',
              gamma=None,
              degree=3,
              coef0=1,
              kernel_params=None,
              alpha=1.0,
              fit_inverse_transform=False,
              eigen_solver='auto',
              tol=0,
              max_iter=None,
              remove_zero_eig=False,
              random_state=None,
              copy_X=True,
              n_jobs=None):
     self._hyperparams = {
         'n_components': n_components,
         'kernel': kernel,
         'gamma': gamma,
         'degree': degree,
         'coef0': coef0,
         'kernel_params': kernel_params,
         'alpha': alpha,
         'fit_inverse_transform': fit_inverse_transform,
         'eigen_solver': eigen_solver,
         'tol': tol,
         'max_iter': max_iter,
         'remove_zero_eig': remove_zero_eig,
         'random_state': random_state,
         'copy_X': copy_X,
         'n_jobs': n_jobs
     }
     self._wrapped_model = SKLModel(**self._hyperparams)
Пример #3
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
Пример #4
0
def create_only_best_paper_pipelines(cfg, feature_preprocessing_key,
                                     N_channels, labels_dict):
    pipelines = dict()
    # Add Riemann pipeline
    n_xdawn_components = 5
    xdawn_class = 'Target'
    ts_metric = 'riemann'
    riemann_cov_type = 'lwf'
    xdawn_cov_type = 'scm'
    xdawn_classes_ = [labels_dict[xdawn_class]]
    new_key = f'ceat_rg_xdawncomps_{n_xdawn_components}_xdawnclasses_{xdawn_class}'
    pipelines[new_key] = SamplePropsPipeline([
        ('xdawn',
         pyriemann.estimation.XdawnCovariances(nfilter=n_xdawn_components,
                                               classes=xdawn_classes_,
                                               estimator=riemann_cov_type,
                                               xdawn_estimator=xdawn_cov_type,
                                               applyfilters=True)),
        ('TangentSpace',
         VariableReferenceTangentSpace(metric=ts_metric,
                                       tsupdate=False,
                                       tangent_space_reference='mean',
                                       random_seed=np.random.randint(2**32 -
                                                                     1))),
        ('LogisticRegression', create_logistic_regression(penalty='l2'))
    ])
    # Add all three LDA versions
    fs = cfg['default']['data_preprocessing']['sampling_rate']
    cfg_vect = cfg['default'][feature_preprocessing_key][
        'feature_preprocessing']
    c_jm = cfg_vect['jumping_means_ival']
    c_sel = cfg_vect['select_ival']
    vectorizers = dict()
    key = 'numerous'
    vectorizers[f'jm_{key}'] = dict(
        vec=Vectorizer(jumping_mean_ivals=c_jm[key]['ival']),
        D=c_jm[key]['D'],
        fs=fs)
    classifiers = dict(
        lda_c_covs=LdaClasswiseCovs(solver='lsqr', shrinkage='auto'),
        lda_p_cov=LdaPooledCovs(N_channels=N_channels),
        lda_imp_p_cov=LdaImpPooledCov(N_channels=N_channels,
                                      standardize_featurestd=True,
                                      channel_gamma=0),
    )
    for v_key in vectorizers.keys():
        D = vectorizers[v_key]['D']
        vec = vectorizers[v_key]['vec']
        for c_key in classifiers.keys():
            clf = clone(classifiers[c_key])
            clf.N_times = D
            new_key = f'{v_key}_{c_key}'
            clf.preproc = vec
            pipelines[new_key] = make_pipeline(vec, clf)
        ncomp = 70
        pipelines[f'{v_key}_kPCA({ncomp})_skl_lsqr'] = make_pipeline(
            vec, KernelPCA(n_components=ncomp),
            LdaClasswiseCovs(solver='lsqr', shrinkage='auto'))
    return pipelines
Пример #5
0
class KernelPCAImpl():
    def __init__(self,
                 n_components=None,
                 kernel='linear',
                 gamma=None,
                 degree=3,
                 coef0=1,
                 kernel_params=None,
                 alpha=1.0,
                 fit_inverse_transform=False,
                 eigen_solver='auto',
                 tol=0,
                 max_iter=None,
                 remove_zero_eig=False,
                 random_state=None,
                 copy_X=True,
                 n_jobs=None):
        self._hyperparams = {
            'n_components': n_components,
            'kernel': kernel,
            'gamma': gamma,
            'degree': degree,
            'coef0': coef0,
            'kernel_params': kernel_params,
            'alpha': alpha,
            'fit_inverse_transform': fit_inverse_transform,
            'eigen_solver': eigen_solver,
            'tol': tol,
            'max_iter': max_iter,
            'remove_zero_eig': remove_zero_eig,
            'random_state': random_state,
            'copy_X': copy_X,
            'n_jobs': n_jobs
        }

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def transform(self, X):
        return self._sklearn_model.transform(X)
Пример #6
0
def create_lda_pipelines(cfg, feature_preprocessing_key, N_channels):
    pipelines = dict()
    fs = cfg['default']['data_preprocessing']['sampling_rate']
    cfg_vect = cfg['default'][feature_preprocessing_key][
        'feature_preprocessing']
    c_jm = cfg_vect['jumping_means_ival']
    c_sel = cfg_vect['select_ival']
    vectorizers = dict()
    for key in c_jm:
        vectorizers[f'jm_{key}'] = dict(
            vec=Vectorizer(jumping_mean_ivals=c_jm[key]['ival']),
            D=c_jm[key]['D'],
            fs=fs)
    for key in c_sel:
        vectorizers[f'sel_{key}'] = dict(
            vec=Vectorizer(select_ival=c_sel[key]['ival']),
            D=c_sel[key]['D'],
            fs=fs)
    classifiers = dict(
        lda_c_covs=LdaClasswiseCovs(solver='lsqr', shrinkage='auto'),
        lda_p_cov=LdaPooledCovs(N_channels=N_channels),
        lda_imp_p_cov=LdaImpPooledCov(N_channels=N_channels,
                                      standardize_featurestd=True,
                                      channel_gamma=0),
    )
    for v_key in vectorizers.keys():
        D = vectorizers[v_key]['D']
        vec = vectorizers[v_key]['vec']
        for c_key in classifiers.keys():
            clf = clone(classifiers[c_key])
            clf.N_times = D
            new_key = f'{v_key}_{c_key}'
            clf.preproc = vec  # why does this not persist :(
            pipelines[new_key] = make_pipeline(vec, clf)
    for v_key in vectorizers.keys():
        D = vectorizers[v_key]['D']
        vec = vectorizers[v_key]['vec']
        for c_key in classifiers.keys():
            clf = clone(classifiers[c_key])
            clf.N_times = D
            new_key = f'{v_key}_{c_key}'
            clf.preproc = vec  # why does this not persist :(
            pipelines[new_key] = make_pipeline(vec, clf)
        for i in range(1, 11):
            ncomp = i * 10 if i < 10 else None
            pipelines[f'{v_key}_kPCA({ncomp})_skl_lsqr'] = make_pipeline(
                vec, KernelPCA(n_components=ncomp),
                LdaClasswiseCovs(solver='lsqr', shrinkage='auto'))
    return pipelines
Пример #7
0
def Negative_Generate_Points_ND_PCA_Cal(vol_data,pointarrays,pos_pnum,target_volume,l_radius,outofbound_thr):
    z_size, y_size, x_size = vol_data.shape
    
    z_max = z_size - 1
    y_max = y_size - 1
    x_max = x_size - 1
    
    planeimgsize = (l_radius*2+1)**2
    planenum = 3
    
    neg_pca_input_mtx = []
    
    planeselect = 0 # default: xy, xz = 1, yz = 2
    f3 = figure(frameon=False)
    fig_col = math.ceil(pos_pnum/10)
    
    if planeselect == 0:
        f3.suptitle('X-Y planes at the negative points in unit volumes', fontsize=16)
    elif planeselect == 1:
        f3.suptitle('X-Z planes at the negative points in unit volumes', fontsize=16)
    else:
        f3.suptitle('Y-Z planes at the negative points in unit volumes', fontsize=16)

    ## Generate the list of negative points using target_volume
    neg_pointarrays = np.ndarray(shape=(pos_pnum,3), dtype='uint8')
    neg_values = np.ndarray(shape=(pos_pnum,), dtype='d')
    neg_pnum = 0
    while (neg_pnum < pos_pnum):
        cand_z = random.randint(0, z_max)
        cand_y = random.randint(0, y_max)
        cand_x = random.randint(0, x_max)

        nx_min = cand_x - l_radius
        ny_min = cand_y - l_radius
        nz_min = cand_z - l_radius
        
        nx_max = cand_x + l_radius
        ny_max = cand_y + l_radius
        nz_max = cand_z + l_radius
        
        if (nx_min >= 0) and (ny_min >= 0) and (nz_min >= 0) and (nx_max <= x_max) and (ny_max <= y_max) and (nz_max <= z_max):
            if(target_volume[cand_z,cand_y,cand_x] < outofbound_thr):
                f3.add_subplot(fig_col+1, 10, neg_pnum+1)  # this line outputs images on top of each other
                
                neg_values[neg_pnum] = round(target_volume[cand_z,cand_y,cand_x], 3)
                
                neg_pointarrays[neg_pnum,0] = cand_x 
                neg_pointarrays[neg_pnum,1] = cand_y
                neg_pointarrays[neg_pnum,2] = cand_z
                
                #X-Y Planes data
                xyimg = vol_data[cand_z,ny_min:ny_max+1,nx_min:nx_max+1]
    
                #X-Z Planes data
                xzimg = vol_data[nz_min:nz_max+1,cand_y,nx_min:nx_max+1]
                
                #Y-Z Planes data
                yzimg = vol_data[nz_min:nz_max+1,ny_min:ny_max+1,cand_x]
        
                if planeselect == 0:
                    imshow(xyimg,cmap=cm.Greys_r)
                elif planeselect == 1:
                    imshow(xzimg,cmap=cm.Greys_r)
                else:
                    imshow(yzimg,cmap=cm.Greys_r)
                axis('off')
        
                xyimg = xyimg.flatten()
                xzimg = xzimg.flatten()
                yzimg = yzimg.flatten()
    
                xyzimg = zeros((planeimgsize, planenum))
                xyzimg[:,0] = xyimg
                xyzimg[:,1] = xzimg
                xyzimg[:,2] = yzimg
                
                neg_pca_input_mtx.append(xyzimg.flatten())
        
                neg_pnum += 1
                
                del xyimg, xzimg, yzimg, xyzimg

    neg_pca_input_arr = array(neg_pca_input_mtx, 'd')
    del neg_pca_input_mtx

    # Linear PCA
#    neg_coeff,neg_meanvector = princomp(neg_pca_input_arr,2)
#    neg_databack = np.zeros((neg_pca_input_arr.shape))
#    for n_index in range(neg_pca_input_arr.shape[0]):
#        l_pr = np.dot(neg_pca_input_arr[n_index,:]-neg_meanvector,neg_coeff)
#        neg_databack[n_index,:] = np.dot(l_pr,neg_coeff.T) + neg_meanvector

    numev = np.round((neg_pca_input_arr.shape[0]/4)*3)
    # numev = neg_pca_input_arr.shape[0]-1
    err = kpcabound(neg_pca_input_arr,1.5,numev)
    #print err
    
    p_index = np.zeros((neg_pca_input_arr.shape[0],))
    for ik in range (neg_pca_input_arr.shape[0]):
        p_index[ik] = ik+1
    
    f7 = figure(frameon=False)
    ax1 = f7.add_subplot(2,1,1)
    # ax1.plot(err, 'b-')
    line, = ax1.semilogy(err, color='blue', lw=2)
    show()
    
    ###########################
    ## KernelPCA
    ###########################
    kpca = KernelPCA(kernel="rbf", gamma=0.5, fit_inverse_transform=True, eigen_solver='auto')
    X_kpca = kpca.fit_transform(neg_pca_input_arr)
    neg_databack = kpca.inverse_transform(X_kpca)

    neg_databack = neg_databack.round()
    
    f4 = figure(frameon=False)

    if planeselect == 0:
        f4.suptitle('X-Y planes with neg-PCA dimensionality reduction', fontsize=16)
    elif planeselect == 1:
        f4.suptitle('X-Z planes with neg-PCA dimensionality reduction', fontsize=16)
    else:
        f4.suptitle('Y-Z planes with neg-PCA dimensionality reduction', fontsize=16)
    fig_col = math.ceil(neg_databack.shape[0]/10)

    ccc = 1
    for i in range(neg_databack.shape[0]):
        f4.add_subplot(fig_col+1, 10, ccc)  # this line outputs images on top of each other
        xyzimg = np.uint8(neg_databack[i,:].reshape(planeimgsize, planenum))
        xyimg = xyzimg[:,0].reshape(l_radius*2+1,l_radius*2+1)
        xzimg = xyzimg[:,1].reshape(l_radius*2+1,l_radius*2+1)
        yzimg = xyzimg[:,2].reshape(l_radius*2+1,l_radius*2+1)
        if planeselect == 0:
            imshow(xyimg,cmap=cm.Greys_r)
        elif planeselect == 1:
            imshow(xzimg,cmap=cm.Greys_r)
        else:
            imshow(yzimg,cmap=cm.Greys_r)
        axis('off')
        ccc += 1
    
    show()
Пример #8
0
			'GenericUnivariateSelect':GenericUnivariateSelect(),
			'GradientBoostingClassifier':GradientBoostingClassifier(),
			'GradientBoostingRegressor':GradientBoostingRegressor(),
			'GraphLasso':GraphLasso(),
			'GraphLassoCV':GraphLassoCV(),
			'HuberRegressor':HuberRegressor(),
			'Imputer':Imputer(),
			'IncrementalPCA':IncrementalPCA(),
			'IsolationForest':IsolationForest(),
			'Isomap':Isomap(),
			'KMeans':KMeans(),
			'KNeighborsClassifier':KNeighborsClassifier(),
			'KNeighborsRegressor':KNeighborsRegressor(),
			'KernelCenterer':KernelCenterer(),
			'KernelDensity':KernelDensity(),
			'KernelPCA':KernelPCA(),
			'KernelRidge':KernelRidge(),
			'LSHForest':LSHForest(),
			'LabelPropagation':LabelPropagation(),
			'LabelSpreading':LabelSpreading(),
			'Lars':Lars(),
			'LarsCV':LarsCV(),
			'Lasso':Lasso(),
			'LassoCV':LassoCV(),
			'LassoLars':LassoLars(),
			'LassoLarsCV':LassoLarsCV(),
			'LassoLarsIC':LassoLarsIC(),
			'LatentDirichletAllocation':LatentDirichletAllocation(),
			'LedoitWolf':LedoitWolf(),
			'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(),
			'LinearRegression':LinearRegression(),
Пример #9
0
 def add_kernel_pca(self):
     self.pipeline.append(KernelPCA())
Пример #10
0
# SCALING
minMaxScaler = MinMaxScaler(feature_range=(0.0, 1.0))
#normalizer = skprep.Normalizer()
columnDeleter = fs.FeatureDeleter()

# FEATURE SELECTION
varianceThresholdSelector = VarianceThreshold(threshold=(0))
percentileSelector = SelectPercentile(score_func=f_classif, percentile=20)
kBestSelector = SelectKBest(f_classif, 1000)

# FEATURE EXTRACTION
#rbmPipe = skpipe.Pipeline(steps=[('scaling', minMaxScaler), ('rbm', rbm)])
nmf = NMF(n_components=150)
pca = PCA(n_components=80)
sparse_pca = SparsePCA(n_components=700, max_iter=3, verbose=2)
kernel_pca = KernelPCA(n_components=150)  # Costs huge amounts of ram
randomized_pca = RandomizedPCA(n_components=500)

# REGRESSORS
random_forest_regressor = RandomForestRegressor(n_estimators=256)
gradient_boosting_regressor = GradientBoostingRegressor(n_estimators=60)
support_vector_regressor = svm.SVR()

# CLASSIFIERS
support_vector_classifier = svm.SVC(probability=True, verbose=True)
linear_support_vector_classifier = svm.LinearSVC(dual=False)
nearest_neighbor_classifier = KNeighborsClassifier()
extra_trees_classifier = ExtraTreesClassifier(n_estimators=256)
bagging_classifier = BaggingClassifier(
    base_estimator=GradientBoostingClassifier(n_estimators=200,
                                              max_features=4),