Exemplo n.º 1
0
def conditional_random_fields(X, y):
	"""
	"""

	X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X]
	Y = y.reshape(-1, 1)

	X_train, X_test, y_train, y_test = train_test_split(X_, Y)

	pbl = GraphCRF()
	svm = OneSlackSSVM(pbl)

	svm.fit(X_train, y_train)
	y_pred = np.vstack(svm.predict(X_test))
	print("Score with pystruct crf svm: %f "
	      % (np.mean(y_pred == y_test)))
	print classification_report(y_test, y_pred)
	plot_confusion_matrix(y_test, y_pred)
Exemplo n.º 2
0
def main(filenames, filename_pheno, phenos):
	"""
	"""
	ids, X = [], []
	snps = None
	for file in filenames:
		ids_t, X_t, snps = load_npz(file)
		ids.append(ids_t)
		X.append(X_t)

	ids = np.concatenate(ids, axis=0)
	X = np.concatenate(X, axis=0)

	data_pheno_ear = RawDataPheno(filename_pheno, phenos)
	df_pheno_ear = data_pheno_ear.get_pheno()
	merged_df = merge_geno_pheno(df_pheno_ear, ids, X, snps)
	norm_x = normalization(merged_df.ix[:,2:])
	
	X_pca = get_PCA(norm_x)
	#merged_df[data_pheno.pheno_name[1]].replace(to_replace=-1, value=0, inplace=True)
	merged_df[data_pheno_ear.pheno_name[-1]] = merged_df[data_pheno_ear.pheno_name[-1]].astype(np.int)
	y = merged_df[data_pheno_ear.pheno_name[-1]].values
	print "Pheno working : {}".format(data_pheno_ear.pheno_name[-1])
	"""if np.nan in y or -1 in y:
		print "hay nan :("
	else:
		print "la muestra esta completa"
	"""
	plot_PCA(X_pca, y)
	
	#conditional_random_fields(norm_x, y)
	X_train, X_test, y_train, y_test = train_test_split(norm_x, y)
	
	clf = support_vector(X_train, y_train)
	clf = extra_tree(X_train, y_train)
	
	y_pred = clf.predict(X_test)
	clf.score(X_test, y_test)
	print classification_report(y_test, y_pred)
	plot_confusion_matrix(y_test, y_pred)