def _get_meta_features(self, gene_sets, filter_down): gs = extract_geneset_pcs(self.df, gene_sets, filter_down) self.loadings, self.pct_var, pathways = gs if hasattr(self.global_vars, 'background'): r = screen_feature(self.global_vars.background, pearson_pandas, pathways) pathways = pathways.ix[r.p > 10e-5] pathways = ((pathways.T - pathways.mean(1)) / pathways.std(1)).T U, S, pc = frame_svd(pathways) self.pathways = pathways self.features['pathways'] = pathways self.global_vars['pathway_pc1'] = pc[0] self.global_vars['pathway_pc2'] = pc[1] self.global_loadings['pathway_pc1'] = U[0] self.global_loadings['pathway_pc2'] = U[1]
def _get_real_features(self): binary, singles, real = extract_features(self.df) background_df = real.ix[real.index.diff(singles.index)].dropna() background = extract_pc(background_df, 0) ss = screen_feature(background['pat_vec'], pearson_pandas, singles) singles = singles.ix[ss.p > 10e-5] singles = ((singles.T - singles.mean(1)) / singles.std(1)).T U, S, pc = frame_svd(singles) self.features['binary'] = binary self.features['real'] = singles self.global_vars['background'] = background['pat_vec'] self.global_vars['filtered_pc1'] = pc[0] self.global_vars['filtered_pc2'] = pc[1] self.global_loadings['background'] = background['gene_vec'] self.global_loadings['filtered_pc1'] = U[0] self.global_loadings['filtered_pc2'] = U[1]
def SVC_fill_old(feature, df): gg = df.apply(lambda s: to_quants(s, std=1) > 0) diff = screen_feature(feature, chi2_cont_test, gg) dd = diff[diff.p < .05] pats = gg.columns.intersection(feature.index) mat = gg.ix[dd.index] X = mat.ix[:, pats].T.as_matrix() Y = (feature) Y = np.array(Y.ix[pats]) X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.35, random_state=5796543) params = [{'kernel': ['rbf'], 'gamma': [0, .1, .05, .01, 1e-3, 1e-4, 1e-5], 'C': [.1, 1, 10, 100, 1000], 'class_weight': ['auto']}, {'kernel': ['linear'], 'C': [1, 10, 100, 1000], 'class_weight': ['auto']}, {'kernel': ['poly'], 'C': [1, 10, 100, 1000], 'class_weight': ['auto']}] clf = GridSearchCV(SVC(C=1), params, score_func=auc_score) clf.fit(X_train, y_train, cv=5); best = clf.best_estimator_ auc = clf.score(X, Y) mat_all = gg.ix[mat.index].T.as_matrix() inferred = best.predict(mat_all) inferred = pd.Series(inferred, index=gg.columns) fun = pd.Series(best.decision_function(mat_all)[:, 0], mat.columns) f = feature.copy() f = f.ix[inferred.index] f[f.isnull()] = inferred[f.isnull()] filled_feature = f.astype(float) return {'auc': auc, 'model': best, 'decision_function': fun, 'inferred_values': inferred, 'filled_feature': filled_feature}