def train_data_format(df): y = df['type'].values df = df.drop(['id', 'type'], axis=1) kpca = KernelPCA(n_components=3, kernel='rbf', degree=2, gamma=0.1) transf = kpca.fit_transform( df.drop( [ 'bl_lt_0.2', 'bl_mt_0.7', #'hl_lt_0.2', 'hl_mt_0.7', # 'rf_lt_0.2', # 'rf_mt_0.8', # 'hs_mt_0.7', # 'hs_lt_0.2' ], axis=1).values) x = df.values x = np.hstack((x, transf)) print(df.dtypes) return x, y
def __init__(self, n_components=None, kernel='linear', gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False, random_state=None, copy_X=True, n_jobs=None): self._hyperparams = { 'n_components': n_components, 'kernel': kernel, 'gamma': gamma, 'degree': degree, 'coef0': coef0, 'kernel_params': kernel_params, 'alpha': alpha, 'fit_inverse_transform': fit_inverse_transform, 'eigen_solver': eigen_solver, 'tol': tol, 'max_iter': max_iter, 'remove_zero_eig': remove_zero_eig, 'random_state': random_state, 'copy_X': copy_X, 'n_jobs': n_jobs } self._wrapped_model = SKLModel(**self._hyperparams)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def create_only_best_paper_pipelines(cfg, feature_preprocessing_key, N_channels, labels_dict): pipelines = dict() # Add Riemann pipeline n_xdawn_components = 5 xdawn_class = 'Target' ts_metric = 'riemann' riemann_cov_type = 'lwf' xdawn_cov_type = 'scm' xdawn_classes_ = [labels_dict[xdawn_class]] new_key = f'ceat_rg_xdawncomps_{n_xdawn_components}_xdawnclasses_{xdawn_class}' pipelines[new_key] = SamplePropsPipeline([ ('xdawn', pyriemann.estimation.XdawnCovariances(nfilter=n_xdawn_components, classes=xdawn_classes_, estimator=riemann_cov_type, xdawn_estimator=xdawn_cov_type, applyfilters=True)), ('TangentSpace', VariableReferenceTangentSpace(metric=ts_metric, tsupdate=False, tangent_space_reference='mean', random_seed=np.random.randint(2**32 - 1))), ('LogisticRegression', create_logistic_regression(penalty='l2')) ]) # Add all three LDA versions fs = cfg['default']['data_preprocessing']['sampling_rate'] cfg_vect = cfg['default'][feature_preprocessing_key][ 'feature_preprocessing'] c_jm = cfg_vect['jumping_means_ival'] c_sel = cfg_vect['select_ival'] vectorizers = dict() key = 'numerous' vectorizers[f'jm_{key}'] = dict( vec=Vectorizer(jumping_mean_ivals=c_jm[key]['ival']), D=c_jm[key]['D'], fs=fs) classifiers = dict( lda_c_covs=LdaClasswiseCovs(solver='lsqr', shrinkage='auto'), lda_p_cov=LdaPooledCovs(N_channels=N_channels), lda_imp_p_cov=LdaImpPooledCov(N_channels=N_channels, standardize_featurestd=True, channel_gamma=0), ) for v_key in vectorizers.keys(): D = vectorizers[v_key]['D'] vec = vectorizers[v_key]['vec'] for c_key in classifiers.keys(): clf = clone(classifiers[c_key]) clf.N_times = D new_key = f'{v_key}_{c_key}' clf.preproc = vec pipelines[new_key] = make_pipeline(vec, clf) ncomp = 70 pipelines[f'{v_key}_kPCA({ncomp})_skl_lsqr'] = make_pipeline( vec, KernelPCA(n_components=ncomp), LdaClasswiseCovs(solver='lsqr', shrinkage='auto')) return pipelines
class KernelPCAImpl(): def __init__(self, n_components=None, kernel='linear', gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False, random_state=None, copy_X=True, n_jobs=None): self._hyperparams = { 'n_components': n_components, 'kernel': kernel, 'gamma': gamma, 'degree': degree, 'coef0': coef0, 'kernel_params': kernel_params, 'alpha': alpha, 'fit_inverse_transform': fit_inverse_transform, 'eigen_solver': eigen_solver, 'tol': tol, 'max_iter': max_iter, 'remove_zero_eig': remove_zero_eig, 'random_state': random_state, 'copy_X': copy_X, 'n_jobs': n_jobs } def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def transform(self, X): return self._sklearn_model.transform(X)
def create_lda_pipelines(cfg, feature_preprocessing_key, N_channels): pipelines = dict() fs = cfg['default']['data_preprocessing']['sampling_rate'] cfg_vect = cfg['default'][feature_preprocessing_key][ 'feature_preprocessing'] c_jm = cfg_vect['jumping_means_ival'] c_sel = cfg_vect['select_ival'] vectorizers = dict() for key in c_jm: vectorizers[f'jm_{key}'] = dict( vec=Vectorizer(jumping_mean_ivals=c_jm[key]['ival']), D=c_jm[key]['D'], fs=fs) for key in c_sel: vectorizers[f'sel_{key}'] = dict( vec=Vectorizer(select_ival=c_sel[key]['ival']), D=c_sel[key]['D'], fs=fs) classifiers = dict( lda_c_covs=LdaClasswiseCovs(solver='lsqr', shrinkage='auto'), lda_p_cov=LdaPooledCovs(N_channels=N_channels), lda_imp_p_cov=LdaImpPooledCov(N_channels=N_channels, standardize_featurestd=True, channel_gamma=0), ) for v_key in vectorizers.keys(): D = vectorizers[v_key]['D'] vec = vectorizers[v_key]['vec'] for c_key in classifiers.keys(): clf = clone(classifiers[c_key]) clf.N_times = D new_key = f'{v_key}_{c_key}' clf.preproc = vec # why does this not persist :( pipelines[new_key] = make_pipeline(vec, clf) for v_key in vectorizers.keys(): D = vectorizers[v_key]['D'] vec = vectorizers[v_key]['vec'] for c_key in classifiers.keys(): clf = clone(classifiers[c_key]) clf.N_times = D new_key = f'{v_key}_{c_key}' clf.preproc = vec # why does this not persist :( pipelines[new_key] = make_pipeline(vec, clf) for i in range(1, 11): ncomp = i * 10 if i < 10 else None pipelines[f'{v_key}_kPCA({ncomp})_skl_lsqr'] = make_pipeline( vec, KernelPCA(n_components=ncomp), LdaClasswiseCovs(solver='lsqr', shrinkage='auto')) return pipelines
def Negative_Generate_Points_ND_PCA_Cal(vol_data,pointarrays,pos_pnum,target_volume,l_radius,outofbound_thr): z_size, y_size, x_size = vol_data.shape z_max = z_size - 1 y_max = y_size - 1 x_max = x_size - 1 planeimgsize = (l_radius*2+1)**2 planenum = 3 neg_pca_input_mtx = [] planeselect = 0 # default: xy, xz = 1, yz = 2 f3 = figure(frameon=False) fig_col = math.ceil(pos_pnum/10) if planeselect == 0: f3.suptitle('X-Y planes at the negative points in unit volumes', fontsize=16) elif planeselect == 1: f3.suptitle('X-Z planes at the negative points in unit volumes', fontsize=16) else: f3.suptitle('Y-Z planes at the negative points in unit volumes', fontsize=16) ## Generate the list of negative points using target_volume neg_pointarrays = np.ndarray(shape=(pos_pnum,3), dtype='uint8') neg_values = np.ndarray(shape=(pos_pnum,), dtype='d') neg_pnum = 0 while (neg_pnum < pos_pnum): cand_z = random.randint(0, z_max) cand_y = random.randint(0, y_max) cand_x = random.randint(0, x_max) nx_min = cand_x - l_radius ny_min = cand_y - l_radius nz_min = cand_z - l_radius nx_max = cand_x + l_radius ny_max = cand_y + l_radius nz_max = cand_z + l_radius if (nx_min >= 0) and (ny_min >= 0) and (nz_min >= 0) and (nx_max <= x_max) and (ny_max <= y_max) and (nz_max <= z_max): if(target_volume[cand_z,cand_y,cand_x] < outofbound_thr): f3.add_subplot(fig_col+1, 10, neg_pnum+1) # this line outputs images on top of each other neg_values[neg_pnum] = round(target_volume[cand_z,cand_y,cand_x], 3) neg_pointarrays[neg_pnum,0] = cand_x neg_pointarrays[neg_pnum,1] = cand_y neg_pointarrays[neg_pnum,2] = cand_z #X-Y Planes data xyimg = vol_data[cand_z,ny_min:ny_max+1,nx_min:nx_max+1] #X-Z Planes data xzimg = vol_data[nz_min:nz_max+1,cand_y,nx_min:nx_max+1] #Y-Z Planes data yzimg = vol_data[nz_min:nz_max+1,ny_min:ny_max+1,cand_x] if planeselect == 0: imshow(xyimg,cmap=cm.Greys_r) elif planeselect == 1: imshow(xzimg,cmap=cm.Greys_r) else: imshow(yzimg,cmap=cm.Greys_r) axis('off') xyimg = xyimg.flatten() xzimg = xzimg.flatten() yzimg = yzimg.flatten() xyzimg = zeros((planeimgsize, planenum)) xyzimg[:,0] = xyimg xyzimg[:,1] = xzimg xyzimg[:,2] = yzimg neg_pca_input_mtx.append(xyzimg.flatten()) neg_pnum += 1 del xyimg, xzimg, yzimg, xyzimg neg_pca_input_arr = array(neg_pca_input_mtx, 'd') del neg_pca_input_mtx # Linear PCA # neg_coeff,neg_meanvector = princomp(neg_pca_input_arr,2) # neg_databack = np.zeros((neg_pca_input_arr.shape)) # for n_index in range(neg_pca_input_arr.shape[0]): # l_pr = np.dot(neg_pca_input_arr[n_index,:]-neg_meanvector,neg_coeff) # neg_databack[n_index,:] = np.dot(l_pr,neg_coeff.T) + neg_meanvector numev = np.round((neg_pca_input_arr.shape[0]/4)*3) # numev = neg_pca_input_arr.shape[0]-1 err = kpcabound(neg_pca_input_arr,1.5,numev) #print err p_index = np.zeros((neg_pca_input_arr.shape[0],)) for ik in range (neg_pca_input_arr.shape[0]): p_index[ik] = ik+1 f7 = figure(frameon=False) ax1 = f7.add_subplot(2,1,1) # ax1.plot(err, 'b-') line, = ax1.semilogy(err, color='blue', lw=2) show() ########################### ## KernelPCA ########################### kpca = KernelPCA(kernel="rbf", gamma=0.5, fit_inverse_transform=True, eigen_solver='auto') X_kpca = kpca.fit_transform(neg_pca_input_arr) neg_databack = kpca.inverse_transform(X_kpca) neg_databack = neg_databack.round() f4 = figure(frameon=False) if planeselect == 0: f4.suptitle('X-Y planes with neg-PCA dimensionality reduction', fontsize=16) elif planeselect == 1: f4.suptitle('X-Z planes with neg-PCA dimensionality reduction', fontsize=16) else: f4.suptitle('Y-Z planes with neg-PCA dimensionality reduction', fontsize=16) fig_col = math.ceil(neg_databack.shape[0]/10) ccc = 1 for i in range(neg_databack.shape[0]): f4.add_subplot(fig_col+1, 10, ccc) # this line outputs images on top of each other xyzimg = np.uint8(neg_databack[i,:].reshape(planeimgsize, planenum)) xyimg = xyzimg[:,0].reshape(l_radius*2+1,l_radius*2+1) xzimg = xyzimg[:,1].reshape(l_radius*2+1,l_radius*2+1) yzimg = xyzimg[:,2].reshape(l_radius*2+1,l_radius*2+1) if planeselect == 0: imshow(xyimg,cmap=cm.Greys_r) elif planeselect == 1: imshow(xzimg,cmap=cm.Greys_r) else: imshow(yzimg,cmap=cm.Greys_r) axis('off') ccc += 1 show()
'GenericUnivariateSelect':GenericUnivariateSelect(), 'GradientBoostingClassifier':GradientBoostingClassifier(), 'GradientBoostingRegressor':GradientBoostingRegressor(), 'GraphLasso':GraphLasso(), 'GraphLassoCV':GraphLassoCV(), 'HuberRegressor':HuberRegressor(), 'Imputer':Imputer(), 'IncrementalPCA':IncrementalPCA(), 'IsolationForest':IsolationForest(), 'Isomap':Isomap(), 'KMeans':KMeans(), 'KNeighborsClassifier':KNeighborsClassifier(), 'KNeighborsRegressor':KNeighborsRegressor(), 'KernelCenterer':KernelCenterer(), 'KernelDensity':KernelDensity(), 'KernelPCA':KernelPCA(), 'KernelRidge':KernelRidge(), 'LSHForest':LSHForest(), 'LabelPropagation':LabelPropagation(), 'LabelSpreading':LabelSpreading(), 'Lars':Lars(), 'LarsCV':LarsCV(), 'Lasso':Lasso(), 'LassoCV':LassoCV(), 'LassoLars':LassoLars(), 'LassoLarsCV':LassoLarsCV(), 'LassoLarsIC':LassoLarsIC(), 'LatentDirichletAllocation':LatentDirichletAllocation(), 'LedoitWolf':LedoitWolf(), 'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(), 'LinearRegression':LinearRegression(),
def add_kernel_pca(self): self.pipeline.append(KernelPCA())
# SCALING minMaxScaler = MinMaxScaler(feature_range=(0.0, 1.0)) #normalizer = skprep.Normalizer() columnDeleter = fs.FeatureDeleter() # FEATURE SELECTION varianceThresholdSelector = VarianceThreshold(threshold=(0)) percentileSelector = SelectPercentile(score_func=f_classif, percentile=20) kBestSelector = SelectKBest(f_classif, 1000) # FEATURE EXTRACTION #rbmPipe = skpipe.Pipeline(steps=[('scaling', minMaxScaler), ('rbm', rbm)]) nmf = NMF(n_components=150) pca = PCA(n_components=80) sparse_pca = SparsePCA(n_components=700, max_iter=3, verbose=2) kernel_pca = KernelPCA(n_components=150) # Costs huge amounts of ram randomized_pca = RandomizedPCA(n_components=500) # REGRESSORS random_forest_regressor = RandomForestRegressor(n_estimators=256) gradient_boosting_regressor = GradientBoostingRegressor(n_estimators=60) support_vector_regressor = svm.SVR() # CLASSIFIERS support_vector_classifier = svm.SVC(probability=True, verbose=True) linear_support_vector_classifier = svm.LinearSVC(dual=False) nearest_neighbor_classifier = KNeighborsClassifier() extra_trees_classifier = ExtraTreesClassifier(n_estimators=256) bagging_classifier = BaggingClassifier( base_estimator=GradientBoostingClassifier(n_estimators=200, max_features=4),