Пример #1
0
def saveModelStuff(model, modelType, Xtest, ytest, Xtrain, ytrain, filename, clustered=False): #modelType = 'SVM'
    if Xtest.shape == (0,):
        print("Xtest is empty")
        return
    print("Done fitting model, getting predictions...", time.ctime())
    predictions = model.predict(Xtest)
    print ("Done with predictions, scoring...", time.ctime())    
    score = model.score(Xtest,ytest)
    #print ("Getting predictions on train data...", time.ctime())    
    #predictionsTrain = model.predict(Xtrain)
    print ("Scoring check...", time.ctime())    
    check = model.score(Xtrain,ytrain)
    
    print("Done with all testing, saving outputs.", time.ctime())
    subfolder = util.string_appendDateAndTime(modelType) + '/'
    path = learn_util.makePathToTrajectories(filename) + subfolder
    if not os.path.exists(path):
        os.makedirs(path)  
    np.savetxt(path + 'ACTUALS-TEST.txt', ytest)
    np.savetxt(path + 'PREDICTIONS-TEST.txt', predictions)
    np.savetxt(path + 'SCORE-TEST.txt', np.array([score]))         
    #np.savetxt(path + 'ACTUALS-TRAIN.txt', ytrain)
    #np.savetxt(path + 'PREDICTIONS-TRAIN.txt', predictionsTrain)
    np.savetxt(path + 'SCORE-TRAIN.txt', np.array([check]))
    
    joblib.dump(model, path + 'MODEL')
    print('model ', modelType, ': score = ', score, 'train_score = ', check)
Пример #2
0
def printClusters(clusterToData, clusterStats, est, dataType, featureList):
	dict2dump = copy.deepcopy(clusterToData)
	numtypeFeatures = getListIntersections(featureList, c.KMEANS_NUM_FEATURES)

	for cluster, recipeList in clusterToData.iteritems():
		print 10 * '-' + cluster + ' '+ 10 * '-'
		clusterData = clusterStats[cluster]
		
		for f, v in clusterData.iteritems():
			if f in numtypeFeatures or isIngredientContraint(f):
				if f == c.KMEANS_FEATURE_TOTALTIME:
					v /= TOTAL_TIME_ADJUSTMENT
				if f == c.KMEANS_FEATURE_NUM_INGREDIENTS:
					v /= NUM_INGREDIENT_ADJUSTMENT
				dict2dump[cluster + ' ' + f + ' average score'] = v
				print "The cluster's %s average score was: %f" % (f, v)
			else:
				dict2dump[cluster + ' ' + f + ' distribution'] = v
				print "The cluster's %s distribution was: " % f
				for name, prob in v.iteritems():
					print "    %s: %f%%" % (name, prob)

		print
		for recipe in recipeList:
			print recipe
		print

	dict2dump['command-line'] = COMMAND_LINE

	tempFilename = dataType + '-cluster-'
	for f in featureList:
		tempFilename = tempFilename + f + '&'
	if tempFilename[-1] == '&':
		tempFilename = tempFilename[:-1]
	toFilename = util.string_appendDateAndTime(tempFilename)
	
	if dataType == c.KMEANS_RECIPE_DATATYPE:
		jsonToFilePath = os.path.join(c.PATH_TO_ROOT, "res/kmeans-results/recipe", toFilename)
	if dataType == c.KMEANS_ALIAS_DATATYPE:
		jsonToFilePath = os.path.join(c.PATH_TO_ROOT, "res/kmeans-results/alias", toFilename)

	util.dumpJSONDict(jsonToFilePath, dict2dump)
	print 'Clustering dumped into: ' + jsonToFilePath
def dumpAliasDataToJSONFiles(aliasData):
	dataFileName = util.string_appendDateAndTime("aliasData")
	dataFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasdata", dataFileName)
	util.dumpJSONDict(dataFilePath, aliasData)