Exemplo n.º 1
0
def fusion_outputs(known_dataset, known_targets, train_index, test_index, fusion_algorithm, ids, algorithm, ind):
	misclassified_ids = []
	combined_predictions = []
	y_test = []

	if fusion_algorithm == 'maj':
		predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset, known_targets, train_index, test_index, ids, algorithm, ind)
		combined_predictions = majority_vote(predictions, y_test, accuracies)

	elif fusion_algorithm == 'wmaj':
		predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset, known_targets, train_index, test_index, ids, algorithm, ind)
		combined_predictions, weights = weighted_majority(predictions, y_test)

	elif fusion_algorithm == 'svm':
		y_test, predictions, combined_predictions, misclassified_ids = svm_fusion(known_dataset, known_targets, train_index, test_index, ids, algorithm, ind)

	elif fusion_algorithm == 'nn':
		print 'not done'
	else:
		print 'Error parsing algorithm'

	# print '###############'
	# print 'Y_PRED %s' % str(predictions)
	# print 'Y_TEST %s' % str(y_test)
	# print 'COMBINED %s' % str(combined_predictions)
	# print '###############'

	(hp, hr, hf), (cp, cr, cf) = measures(y_test, combined_predictions)

	error = (float(sum((combined_predictions - y_test)**2)) / len(y_test))
	f1 = f1_score(combined_predictions, y_test)
	return error, f1, misclassified_ids, (hp, hr, hf), (cp, cr, cf)
Exemplo n.º 2
0
def lr_one_fold_measures_feature_selection(X_train, X_test, y_train, y_test):
	model = lr_feature_selection(X_train, y_train)
	# print 'Model score %f' % model.score(X_test, y_test)
	y_pred = model.predict(X_test)
	error_rate = (float(sum((y_pred - y_test)**2)) / len(y_test))
	f1 = f1_score(y_test, y_pred)
	(hp, hr, hf), (cp, cr, cf) = measures(y_test, y_pred)

	# print_pred_test(y_pred, y_test)
	return error_rate, f1, model, (hp, hr, hf), (cp, cr, cf)
Exemplo n.º 3
0
def ensemble_one_fold_measures(X_train, X_test, X_train_scaled, X_test_scaled, y_train, y_test, dt, knn, svm):
	model_dt = dt(X_train, y_train)
	y_pred_dt = model_dt.predict(X_test)
	model_knn = knn(X_train_scaled, y_train)
	y_pred_knn = model_knn.predict(X_test_scaled)
	model_svm = svm(X_train_scaled, y_train)
	y_pred_svm = model_svm.predict(X_test_scaled)

	y_pred = []
	assert len(y_pred_dt) == len(y_pred_knn)
	assert len(y_pred_dt) == len(y_pred_svm)

	for i in range(len(y_pred_dt)):
		data = Counter([y_pred_dt[i], y_pred_knn[i], y_pred_svm[i]])
		y_pred.append(data.most_common(1)[0][0])

	error_rate = (float(sum((y_pred - y_test)**2)) / len(y_test))
	f1 = f1_score(y_test, y_pred)		
	(hp, hr, hf), (cp, cr, cf) = measures(y_test, y_pred)
	return error_rate, f1, (hp, hr, hf), (cp, cr, cf)	
def fusion(theme, algorithm, training_data, training_targets, testing_data, testing_targets, fusion_algorithm, ind=False):
	models = []
	for i in range(NR_THEMES):

		if ind:
			if i == 0:
				model = svm_selected_net(training_data[i], training_targets)
			elif i == 1:
				model = svm_selected_ill(training_data[i], training_targets)
			elif i == 2:
				model = svm_selected_ideo(training_data[i], training_targets)
		else:
			model = algorithm(training_data[i], training_targets)
		models.append(model)

	predictions = []
	for i in range(NR_THEMES):
		y_pred = models[i].predict(testing_data[i])
		predictions.append(y_pred)
	predictions = np.array((predictions[0], predictions[1], predictions[2]), dtype=float)

	if fusion_algorithm == "maj":
		combined_predictions = majority_vote(predictions, testing_targets, [])
	elif fusion_algorithm == "wmaj":
		combined_predictions = weighted_majority_theme(theme, predictions)
	elif fusion_algorithm == "svm":	
		combined_predictions = svm_vote(predictions, testing_targets)
	else:
		print 'ERROR'	
	
	print 'PRED ' + str(combined_predictions)
	print 'TEST ' + str(testing_targets)	

	(hp, hr, hf), (cp, cr, cf) = measures(testing_targets, combined_predictions)
	error_rate = (float(sum((combined_predictions - testing_targets)**2)) / len(testing_targets))
	return error_rate, (hp, hr, hf), (cp, cr, cf)
Exemplo n.º 5
0
def fusion(training_data, training_data_scaled, training_targets, testing_data, testing_data_scaled, testing_targets, fusion_algorithm):
	models_dt = []
	models_dt.append(dt(training_data[0], training_targets))
	models_dt.append(dt(training_data[1], training_targets))
	models_dt.append(dt(training_data[2], training_targets))

	models_knn = []
	models_knn.append(knn(training_data_scaled[0], training_targets))
	models_knn.append(knn(training_data_scaled[1], training_targets))
	models_knn.append(knn(training_data_scaled[2], training_targets))

	models_svm = []
	models_svm.append(svm_selected_net(training_data_scaled[0], training_targets))
	models_svm.append(svm_selected_ill(training_data_scaled[1], training_targets))
	models_svm.append(svm_selected_ideo(training_data_scaled[2], training_targets))

	predictions_dt = []
	predictions_knn = []
	predictions_svm = []
	for i in range(NR_THEMES):
		y_pred_dt = models_dt[i].predict(testing_data[i])
		predictions_dt.append(y_pred_dt)

		y_pred_knn = models_knn[i].predict(testing_data_scaled[i])
		predictions_knn.append(y_pred_knn)

		y_pred_svm = models_svm[i].predict(testing_data_scaled[i])
		predictions_svm.append(y_pred_svm)

	predictions_dt = np.array((predictions_dt[0], predictions_dt[1], predictions_dt[2]), dtype=float)
	predictions_knn = np.array((predictions_knn[0], predictions_knn[1], predictions_knn[2]), dtype=float)
	predictions_svm = np.array((predictions_svm[0], predictions_svm[1], predictions_svm[2]), dtype=float)	

	combined_predictions = []

	if fusion_algorithm == "maj":
		combined_predictions_dt = majority_vote(predictions_dt, testing_targets, [])
		combined_predictions_knn = majority_vote(predictions_knn, testing_targets, [])
		combined_predictions_svm = majority_vote(predictions_svm, testing_targets, [])
	elif fusion_algorithm == "wmaj":
		combined_predictions_dt = weighted_majority_theme('dt', predictions_dt)
		combined_predictions_knn = weighted_majority_theme('knn', predictions_knn)
		combined_predictions_svm = weighted_majority_theme('svm', predictions_svm)
	elif fusion_algorithm == "svm":	
		combined_predictions_dt = svm_vote(predictions_dt)
		combined_predictions_knn = svm_vote(predictions_knn)
		combined_predictions_svm = svm_vote(predictions_svm)
	else:
		print 'ERROR'		

	combined_predictions = []		
	for i in range(len(combined_predictions_dt)):
		data = Counter([combined_predictions_dt[i], combined_predictions_knn[i], combined_predictions_svm[i]])
		combined_predictions.append(data.most_common(1)[0][0])	
	
	print 'predictions DT ' + str(predictions_dt) 	
	print 'combined predictions DT ' + str(combined_predictions_dt) 	
	print 'predictions KNN ' + str(predictions_knn) 	
	print 'combined predictions KNN ' + str(combined_predictions_knn)
	print 'predictions SVM ' + str(predictions_svm) 	
	print 'combined predictions SVM ' + str(combined_predictions_svm)

	print 'PRED ' + str(combined_predictions)
	print 'TEST ' + str(testing_targets)	
	
	(hp, hr, hf), (cp, cr, cf) = measures(testing_targets, combined_predictions)
	error_rate = (float(sum((combined_predictions - testing_targets)**2)) / len(testing_targets))
	return error_rate, (hp, hr, hf), (cp, cr, cf)
Exemplo n.º 6
0
def fusion_outputs_ensemble(known_dataset, known_targets, known_dataset_scaled, dt, knn, svm, fusion_algorithm, train_index, test_index, ids):
	misclassified_ids = []
	combined_predictions = []
	y_test = []

	if fusion_algorithm == 'maj':
		predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset, known_targets, train_index, test_index, ids, dt, ind=False)
		combined_predictions_dt = majority_vote(predictions, y_test, accuracies)
		predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset_scaled, known_targets, train_index, test_index, ids, knn, ind=False)
		combined_predictions_knn = majority_vote(predictions, y_test, accuracies)
		predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset_scaled, known_targets, train_index, test_index, ids, svm, ind=True)
		combined_predictions_svm = majority_vote(predictions, y_test, accuracies)

		combined_predictions = []
		assert len(combined_predictions_dt) == len(combined_predictions_knn)
		assert len(combined_predictions_dt) == len(combined_predictions_svm)

		for i in range(len(combined_predictions_dt)):
			data = Counter([combined_predictions_dt[i], combined_predictions_knn[i], combined_predictions_svm[i]])
			combined_predictions.append(data.most_common(1)[0][0])

	elif fusion_algorithm == 'wmaj':
		predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset, known_targets, train_index, test_index, ids, dt, ind=False)
		combined_predictions_dt, weights = weighted_majority(predictions, y_test)
		predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset_scaled, known_targets, train_index, test_index, ids, knn, ind=False)
		combined_predictions_knn, weights = weighted_majority(predictions, y_test)
		predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset_scaled, known_targets, train_index, test_index, ids, svm, ind=True)
		combined_predictions_svm, weights = weighted_majority(predictions, y_test)

		combined_predictions = []
		assert len(combined_predictions_dt) == len(combined_predictions_knn)
		assert len(combined_predictions_dt) == len(combined_predictions_svm)

		for i in range(len(combined_predictions_dt)):
			data = Counter([combined_predictions_dt[i], combined_predictions_knn[i], combined_predictions_svm[i]])
			combined_predictions.append(data.most_common(1)[0][0])

	elif fusion_algorithm == 'svm':
		y_test, predictions, combined_predictions_dt, misclassified_ids = svm_fusion(known_dataset, known_targets, train_index, test_index, ids, dt, ind=False)
		y_test, predictions, combined_predictions_knn, misclassified_ids = svm_fusion(known_dataset_scaled, known_targets, train_index, test_index, ids, knn, ind=False)
		y_test, predictions, combined_predictions_svm, misclassified_ids = svm_fusion(known_dataset_scaled, known_targets, train_index, test_index, ids, svm, ind=True)

		combined_predictions = []
		assert len(combined_predictions_dt) == len(combined_predictions_knn)
		assert len(combined_predictions_dt) == len(combined_predictions_svm)

		for i in range(len(combined_predictions_dt)):
			data = Counter([combined_predictions_dt[i], combined_predictions_knn[i], combined_predictions_svm[i]])
			combined_predictions.append(data.most_common(1)[0][0])

	elif fusion_algorithm == 'nn':
		print 'not done'
	else:
		print 'Error parsing algorithm'

	# print '###############'
	# print 'Y_PRED %s' % str(predictions)
	# print 'Y_TEST %s' % str(y_test)
	# print 'COMBINED %s' % str(combined_predictions)
	# print '###############'

	(hp, hr, hf), (cp, cr, cf) = measures(y_test, combined_predictions)

	error = (float(sum((combined_predictions - y_test)**2)) / len(y_test))
	f1 = f1_score(combined_predictions, y_test)
	return error, f1, misclassified_ids, (hp, hr, hf), (cp, cr, cf)