def conditional_random_fields(X, y): """ """ X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] Y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_, Y) pbl = GraphCRF() svm = OneSlackSSVM(pbl) svm.fit(X_train, y_train) y_pred = np.vstack(svm.predict(X_test)) print("Score with pystruct crf svm: %f " % (np.mean(y_pred == y_test))) print classification_report(y_test, y_pred) plot_confusion_matrix(y_test, y_pred)
def main(filenames, filename_pheno, phenos): """ """ ids, X = [], [] snps = None for file in filenames: ids_t, X_t, snps = load_npz(file) ids.append(ids_t) X.append(X_t) ids = np.concatenate(ids, axis=0) X = np.concatenate(X, axis=0) data_pheno_ear = RawDataPheno(filename_pheno, phenos) df_pheno_ear = data_pheno_ear.get_pheno() merged_df = merge_geno_pheno(df_pheno_ear, ids, X, snps) norm_x = normalization(merged_df.ix[:,2:]) X_pca = get_PCA(norm_x) #merged_df[data_pheno.pheno_name[1]].replace(to_replace=-1, value=0, inplace=True) merged_df[data_pheno_ear.pheno_name[-1]] = merged_df[data_pheno_ear.pheno_name[-1]].astype(np.int) y = merged_df[data_pheno_ear.pheno_name[-1]].values print "Pheno working : {}".format(data_pheno_ear.pheno_name[-1]) """if np.nan in y or -1 in y: print "hay nan :(" else: print "la muestra esta completa" """ plot_PCA(X_pca, y) #conditional_random_fields(norm_x, y) X_train, X_test, y_train, y_test = train_test_split(norm_x, y) clf = support_vector(X_train, y_train) clf = extra_tree(X_train, y_train) y_pred = clf.predict(X_test) clf.score(X_test, y_test) print classification_report(y_test, y_pred) plot_confusion_matrix(y_test, y_pred)