Пример #1
0
def model_selection(feature_selector, classifier, dataset, write_filename=None):

	feature_selector.attach_dataset(dataset)

	feature_sel_name = feature_selector.__class__.__name__
	classifier_name = classifier.__class__.__name__

	base_filename = write_filename

	exec_mode = 'manytomany'
	if '_exec_mode' in config.get('model_classifier_params'):
		exec_mode = config.get('model_classifier_params').get('_exec_mode')


	if '_iter' in config.get('model_selector_params').get(feature_sel_name):
		_sel_params = config.get('model_selector_params').get(feature_sel_name).get('_iter')
	else:		
		_sel_params = [config.get('model_selector_params').get(feature_sel_name)] if config.get('model_selector_params').get(feature_sel_name) is not None else [{}]


	cl_params_list = config.get('model_classifier_params').get(classifier_name) if config.get('model_classifier_params').get(classifier_name) is not None else [{}]

	all_classifier_stats = Stats()
	for cl_param in cl_params_list:

		classifier_stats = Stats()
			
		if exec_mode == 'one2one':
			sel_params = [_sel_params.next()]
		else:
			sel_params = _sel_params

		for sel_param in sel_params:

			feature_selector.select(sel_param)
			best=[]
			f_idx = 0

			for data in feature_selector.training_data():
				f_idx = f_idx+1

				if isinstance(data, types.GeneratorType):
					for pair in data:
						stats = Stats.run_timed(lambda :classifier.fit(pair, cl_param))
						feature_selector.eval(stats)
					stats = feature_selector.eval_set()
				else:
					stats = Stats.run_timed(lambda :classifier.fit(data, cl_param))
					stats.add_metric(list(data[0].axes[0].values), '_features')


				if base_filename is not None:
					write_filename = base_filename+'_'+classifier.desc()+'_'+feature_selector.desc()

				classifier_stats.add_classifier_stat(stats)
				
				print('Finished model selection with classifer "{}" with {} feature selector "{}"'.format(classifier.desc(), f_idx, feature_selector.desc()))
				
				stats.set_printheader(stat_header({'FeatureCount': len(stats.metrics['_features'][0]), 'Set':f_idx}, classifier.desc()))
				# cond = lambda s: s.conf_based_stats()[1] > .70 and (1-s.conf_based_stats()[2]) < .35
				cond = None
				stats.mystats(filename=write_filename, cond=cond)

				## Thresholding for good feature set selection which will be saved later
				# if stats.conf_based_stats()[1] > .70 and (1-stats.conf_based_stats()[2]) < .35:
				best = []
				best.append(stats.metrics['_features'][0])

				stats.add_metric(stats.conf_based_stats()[0], 'Accuracy')
				stats.add_metric(stats.conf_based_stats()[3], 'f1-score')


			#save the plot to file
				if write_filename is not None:
					# classifier_stats.classifier_stats(filename=utils.replace_with_(write_filename), title='{} \n {}'.format(classifier.desc(),feature_selector.desc()))
					best_filename = config.get('best_features_file')+'_'+utils.replace_with_(write_filename)
				else:
					best_filename = utils.replace_with_(config.get('best_features_file'))

				#save the best features to file
				if len(best) > 0:
					utils.save_string_data(os.path.join(config.get('output_dir'), best_filename), best)



		all_classifier_stats.add_classifiers_stat(classifier_stats, classifier.desc())

	return all_classifier_stats
Пример #2
0
def test(feature_selector, classifier, dataset, write_filename=None):

	feature_selector.attach_dataset(dataset)

	base_filename = write_filename

	feature_sel_name = feature_selector.__class__.__name__
	classifier_name = classifier.__class__.__name__

	exec_mode = 'manytomany'
	if '_exec_mode' in config.get('test_classifier_params'):
		exec_mode = config.get('test_classifier_params').get('_exec_mode')


	if '_iter' in config.get('test_selector_params').get(feature_sel_name):
		_sel_params = config.get('test_selector_params').get(feature_sel_name).get('_iter')
	else:		
		_sel_params = [config.get('test_selector_params').get(feature_sel_name)] if config.get('test_selector_params').get(feature_sel_name) is not None else [{}]


	cl_params_list = config.get('test_classifier_params').get(classifier_name) if config.get('test_classifier_params').get(classifier_name) is not None else [{}]

	all_classifier_stats = Stats()
	for cl_param in cl_params_list:

		classifier_stats = Stats()
			
		if exec_mode == 'one2one':
			sel_params = [_sel_params.next()]
		else:
			sel_params = _sel_params

		for sel_param in sel_params:

			feature_selector.select(sel_param)

			test_data_gen = feature_selector.test_data()		

			f_idx = 0
			for data in feature_selector.training_data():
				f_idx = f_idx+1 

				st = Stats.run_timed(lambda :classifier.fit(data, cl_param))

				if base_filename is not None:
					write_filename = base_filename+'_'+classifier.desc()+'_'+feature_selector.desc()

				
				print('Finished testing with classifer "{}" with feature selector "{}"'.format(classifier.desc(), feature_selector.desc()))

				test_data = test_data_gen.next()
				stats = classifier.predict(test_data[0])
				stats.set_printheader(stat_header({'FeatureCount':data[0].shape[0], 'Set':f_idx}, classifier.desc()))
				stats.record_confusion_matrix(test_data[1])

				#writes data to a file/console if the optional condition is true
				cond = lambda s: s.conf_based_stats()[0]>.6
				cond = None
				idents = test_data[0].axes[1].values
				stats.mystats(filename=write_filename, cond=cond, dataset=dataset, ids=idents)

				classifier_stats.add_classifier_stat(stats)

		all_classifier_stats.add_classifiers_stat(classifier_stats, classifier.desc())

	return all_classifier_stats