示例#1
0
	def train(self,samples):
		"""Train with CPAR on the sample set, returning an AssociationRuleSet."""
		current_class = samples.get_current_class()
		""" original. changed by RVF, see below...
		print "TRAINER FOUND PARAMETERS FROM %s"%(self.parameters)
		svm_trainer = libSVMTrainer(self.parameters)
		cpar_trainer = CPARTrainer(self.parameters)
		"""
		
		if self.parameters == None:
			print "Using standard parameters. "
		elif os.path.isfile(self.parameters):
			print "TRAINER FOUND PARAMETERS FROM %s"%(self.parameters)
		else:
			print "Trainer DID NOT find %s"%(self.parameters)
		svm_trainer = libSVMTrainer(self.parameters)
		cpar_trainer = CPARTrainer(self.parameters)
		
		arset = cpar_trainer.train(samples)
		print "Found %d rules!"%(len(arset))
		distinct_items = {}
		for rule in arset:
			for item in rule.ls:
				distinct_items[item] = 1
		print "Found %d distinct items"%(len(distinct_items.keys()))
		sample_set_feature_selected = samples.feature_select(distinct_items.keys())
		sample_set_feature_selected.set_current_class(current_class)
		non_zero_features = {}
		class_labels = {}
		for sample in sample_set_feature_selected:
			for item in nonzero(sample.get_attribute_matrix())[0]:
				non_zero_features[int(item)] = 1
			class_labels[sample.get_class_label()] = 1
		print "Using %d features with SVM classifier over %s (%d) class labels."%(len(non_zero_features.keys()),str(class_labels.keys()),len(class_labels.keys()))
		
		model = svm_trainer.train(sample_set_feature_selected)
		
		return model
示例#2
0
	
	samples.set_current_class(options.target_class)
	print "Parameters from %s"%(options.parameters)
	print "Compressing features...",
	samples = samples.compress_features()
	print "compressed to %d distinct features."%(samples.get_number_of_features())
	
	samples.set_current_class(options.target_class)
	samples.hide_nulls(options.target_class)
	
	test_configurations = []
	confounders = ("genus","family","order","class","phylum","superkingdom")
	scores_list = (("cmi",),("cwmi",),("mi","cwmi"))
	
	feature_selector = CWMIRankFeatureSelector(confounders_filename=options.parameters,scores=("mi",),features_per_class=options.features_per_class,confounder=options.confounder)
	trainer = libSVMTrainer(kernel_type="LINEAR",C=5)
	classifier = libSVMClassifier()
	tc = TestConfiguration("mi",feature_selector,trainer,classifier)
	test_configurations.append(tc)
	

	for scores in scores_list:
		feature_selector = CWMIRankFeatureSelector(features_per_class=options.features_per_class,confounder=options.confounder,scores=scores,confounders_filename=options.parameters)
		tc = TestConfiguration("%s_%s"%("_".join(scores),options.confounder),feature_selector,trainer,classifier)
		test_configurations.append(tc)

	root = "%s_%s_p%dn%d"%(options.target_class,options.confounder,options.features_per_class,options.features_per_class)
	
	crossvalidator = CrossValidation(samples,options.parameters,options.folds,options.replicates,test_configurations,root_output=options.output_filename)
	crossvalidator.crossvalidate()