Пример #1
0
 def on_train_clicked(self, widget):
     # FOR NOW, only SVM is supported
     if self.sClassifier == "SVM":
         filen = self.getFilenameToRead("Open training data", filter='mat')
         if filen is not None:
             data = ml.VectorDataSet(filen, labelsColumn=0)
             self.clssfr = ml.SVM()
             self.clssfr.train(data)
             # train finished. need to update button status
             self.setDisabledBtns()
             self.showMessage("Training SVM is done.")
     else:
         self.showMessage("The classifier is not supported yet!")
Пример #2
0
# [550,600,650,700] -- C: 550
# [510,520,530,540] -- C: 520
# [515,516,517,518,519,520,521,522,523,524,525] -- C: 524.000000
# possibleC =  list(np.arange(523.6,524.6,.01)) -- C: 523.830000
#

# 'vec_train_scene_TESTSTUxTESTNSU_Wed02Mar2011_20-42-01rPRC_block_svmData.txt'
# [0.01,1,10,100,500,1000] -- C: 1
# [0.1,.2,.3,2,3,4,5,6] -- C: 0.3
# [0.2,.3,.4,.5,.6,.7] -- C: 0.4
# list(np.arange(.3,.5,.01)) -- C: 0.31000 # CV RUNNING

trainFile = 'vec_train_word_TESTSTUxTESTNSU_Wed02Mar2011_20-42-01rPRC_block_svmData.txt'

#possibleC = [515,516,517,518,519,520,521,522,523,524,525]
possibleC = list(np.arange(523.6, 524.6, .01))
trainData = ml.VectorDataSet(trainFile, labelsColumn=1, idColumn=0)
# assumes data is csv and znormed

startTime = strftime("%a%d%b%Y_%H:%M:%S")

s = ml.SVM()
param = ml.modelSelection.Param(s, 'C', possibleC)
m = ml.modelSelection.ModelSelector(param)
m.train(trainData)

stopTime = strftime("%a%d%b%Y_%H:%M:%S")

print(startTime, stopTime)
print(m)
Пример #3
0
def optC(vecFile=''):
	"""
	A function for optimizing C (the soft margin constatn) 
	for PyML datasets and SVMs.  Optimization proceeds by
	first stepping further inside the range of
	{0.1...1} in increments of 0.1 or outside it, by 
	orders of magnitude.
	
	Max possible C values are 0.01 and 1000; Best possible 
	precision is to the first decimal place.

	PyML is very verbose, so progress in this optmization 
	is recorded in 'optC.log' in the PWD. This log file is 
	appended to across invocations and so may grow without bound.
	"""
	import PyML as ml
	import numpy as np
	from time import strftime
	
	trainData = ml.VectorDataSet(vecFile,labelsColumn=1,idColumn=0)
	log = open('optC.log','a')

	bestC = 1
	stepSize = .1 
		## init w reasonable but in the stop criterion's range

	possibleC = np.array([.1,1])
		## middle to start; possibleC can span no more 
		## than 1 power of 10 otherwise this function will
		## blowup or take an eternity

		## possibleC must be cast as float; int breaks PyML
	log.write('\n\n\n**Begining new optimization.**\n')
	log.write('Dataset: {0}\n'.format(vecFile))
	log.write('First set of possible C values: {0}.\n'.format(possibleC))

	while True:
		log.flush()

		## try all 'possibleC'
		startTime = strftime("%a%d%b%Y_%H:%M:%S")
		s = ml.SVM()
		param = ml.modelSelection.Param(s, 'C', possibleC)
		m = ml.modelSelection.ModelSelector(param)
		m.train(trainData)
		stopTime = strftime("%a%d%b%Y_%H:%M:%S")
		
		log.write('Start/stop times last iteration: 
				{0}/{1}\n'.format(startTime,stopTime))
		
		bestC = m.classifier.C

		## The stop criterion is the 
		## level of precision desired.
		if stepSize < .1:
			log.write('SUCCESS. C is {0}\n'.format(bestC))
			break
		## C can not be greater than 1000
		## or less than .01
		elif bestC > 1000 or bestC < 0.01:
			log.write('WARNING: C is out of range. C is {0}\n'.format(bestC))
			break
		else:
			## Where was best C for last iteration?
			## Use that location to define next set of 
			## possible C values.
			indexC = possibleC.tolist().index(bestC)
			log.write('Best C for last interation: {0}.\n'.format(bestC))
			
			if possibleC[indexC] == possibleC.max():
				stepSize = round(possibleC.max())
				possibleC = np.arange(stepSize,(stepSize*10),stepSize)
				log.write('At max range, new values are: {0}.\n'.format(possibleC))
			elif indexC == 0:
				stepSize = possibleC.min()/10
				possibleC = np.arange(stepSize,possibleC.min(),stepSize)	
				log.write('At min range, new values are: {0}.\n'.format(possibleC))
			else:
				stepSize = stepSize/10
				possibleC = np.arange(possibleC[indexC-1],possibleC[indexC+1],stepSize)
				log.write('Was in range, next values are: {0}.\n'.format(possibleC))
	log.close()
	return bestC