def test_best_value(): I, J = 10, 9 values_K = [1, 2, 4, 5] values_L = [5, 4, 3] R = 2 * numpy.ones((I, J)) M = numpy.ones((I, J)) priors = {'alpha': 3, 'beta': 4, 'lambdaF': 5, 'lambdaS': 6, 'lambdaG': 7} initFG = 'exp' initS = 'random' iterations = 11 greedysearch = GreedySearch(classifier, values_K, values_L, R, M, priors, initS, initFG, iterations) greedysearch.all_performances = { 'BIC': [(1, 2, 10.), (2, 2, 20.), (2, 3, 30.), (2, 4, 5.), (5, 3, 20.)], 'AIC': [(1, 2, 10.), (2, 2, 20.), (2, 3, 4.), (2, 4, 25.), (5, 3, 20.)], 'loglikelihood': [(1, 2, 10.), (2, 2, 8.), (2, 3, 30.), (2, 4, 40.), (5, 3, 20.)] } assert greedysearch.best_value('BIC') == (2, 4) assert greedysearch.best_value('AIC') == (2, 3) assert greedysearch.best_value('loglikelihood') == (2, 2) with pytest.raises(AssertionError) as error: greedysearch.all_values('FAIL') assert str(error.value) == "Unrecognised metric name: FAIL."
def test_best_value(): I,J = 10,9 values_K = [1,2,4,5] values_L = [5,4,3] R = 2*numpy.ones((I,J)) M = numpy.ones((I,J)) priors = { 'alpha':3, 'beta':4, 'lambdaF':5, 'lambdaS':6, 'lambdaG':7 } initFG = 'exp' initS = 'random' iterations = 11 greedysearch = GreedySearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations) greedysearch.all_performances = { 'BIC' : [(1,2,10.),(2,2,20.),(2,3,30.),(2,4,5.),(5,3,20.)], 'AIC' : [(1,2,10.),(2,2,20.),(2,3,4.),(2,4,25.),(5,3,20.)], 'loglikelihood' : [(1,2,10.),(2,2,8.),(2,3,30.),(2,4,40.),(5,3,20.)] } assert greedysearch.best_value('BIC') == (2,4) assert greedysearch.best_value('AIC') == (2,3) assert greedysearch.best_value('loglikelihood') == (2,2) with pytest.raises(AssertionError) as error: greedysearch.all_values('FAIL') assert str(error.value) == "Unrecognised metric name: FAIL."
def run(self, burn_in=None, thinning=None, minimum_TN=None): folds_test = mask.compute_folds(self.I, self.J, self.folds, self.M) folds_training = mask.compute_Ms(folds_test) for i, (train, test) in enumerate(zip(folds_training, folds_test)): print "Fold %s." % (i + 1) # Run the greedy grid search greedy_search = GreedySearch(classifier=self.classifier, values_K=self.values_K, values_L=self.values_L, R=self.R, M=self.M, priors=self.priors, initS=self.init_S, initFG=self.init_FG, iterations=self.iterations, restarts=self.restarts) greedy_search.search(self.quality_metric, burn_in=burn_in, thinning=thinning, minimum_TN=minimum_TN) # Store the model fits, and find the best one according to the metric all_performances = greedy_search.all_values( metric=self.quality_metric) self.fout.write("All model fits for fold %s, metric %s: %s.\n" % (i + 1, self.quality_metric, all_performances)) self.fout.flush() best_KL = greedy_search.best_value(metric=self.quality_metric) self.fout.write("Best K,L for fold %s: %s.\n" % (i + 1, best_KL)) # Train a model with this K and measure performance on the test set performance = self.run_model(train, test, best_KL[0], best_KL[1], burn_in=burn_in, thinning=thinning, minimum_TN=minimum_TN) self.fout.write("Performance: %s.\n\n" % performance) self.fout.flush()
def run(self, burn_in=None, thinning=None, minimum_TN=None): folds_test = mask.compute_folds(self.I, self.J, self.folds, self.M) folds_training = mask.compute_Ms(folds_test) for i, (train, test) in enumerate(zip(folds_training, folds_test)): print "Fold %s." % (i + 1) # Run the greedy grid search greedy_search = GreedySearch( classifier=self.classifier, values_K=self.values_K, values_L=self.values_L, R=self.R, M=self.M, priors=self.priors, initS=self.init_S, initFG=self.init_FG, iterations=self.iterations, restarts=self.restarts, ) greedy_search.search(self.quality_metric, burn_in=burn_in, thinning=thinning, minimum_TN=minimum_TN) # Store the model fits, and find the best one according to the metric all_performances = greedy_search.all_values(metric=self.quality_metric) self.fout.write( "All model fits for fold %s, metric %s: %s.\n" % (i + 1, self.quality_metric, all_performances) ) self.fout.flush() best_KL = greedy_search.best_value(metric=self.quality_metric) self.fout.write("Best K,L for fold %s: %s.\n" % (i + 1, best_KL)) # Train a model with this K and measure performance on the test set performance = self.run_model( train, test, best_KL[0], best_KL[1], burn_in=burn_in, thinning=thinning, minimum_TN=minimum_TN ) self.fout.write("Performance: %s.\n\n" % performance) self.fout.flush()
values_i, cmap="jet_r", vmin=min(values), vmax=max(values), origin="lower", extent=[min(list_values_K) - 1, max(list_values_K) + 1, min(list_values_L) - 1, max(list_values_L) + 1], ) plt.scatter(list_values_K, list_values_L, c=values, cmap="jet_r") plt.colorbar() plt.title("Metric: %s." % metric) plt.xlabel("K") plt.ylabel("L") plt.show() # Print the best value best_K, best_L = greedy_search.best_value(metric) print "Best K,L for metric %s: %s,%s." % (metric, best_K, best_L) # Also print out all values in a dictionary all_values = {} for metric in metrics: (_, _, values) = zip(*numpy.array(greedy_search.all_values(metric))) all_values[metric] = list(values) print "all_values = %s \nlist_values_K=%s \nlist_values_L=%s" % (all_values, list(list_values_K), list(list_values_L)) """ all_values = {'MSE': [3.0272042551947203, 3.027204256305112, 3.0272042923576148, 2.5914654932112464, 2.5918836849320201, 2.5914602381010914, 2.3493739958858635, 2.3511225674996381, 2.3584324978814539, 2.1868222893761833, 2.1911559705091568, 2.2016668628098452, 2.0510257720785683, 2.0546897432717603, 2.0586496735360251, 2.0826309185454925], 'loglikelihood': [-138379.73430838491, -138379.74014614287, -138380.57362950334, -132935.31284836732, -132949.35927074254, -132936.87960196467, -129506.33023969264, -129543.6258747291, -129641.90589727147, -127005.00858615834, -127072.80234078577, -127243.67779180428, -124768.25065830135, -124835.58530247367, -124903.988439383, -125310.14633691005], 'AIC': [278283.46861676982, 279529.48029228573, 278565.14725900668, 268922.62569673464, 270198.71854148508, 269207.75920392934, 263596.66047938529, 264921.2517494582, 264151.81179454294, 260130.01717231667, 261517.60468157154, 260893.35558360856, 257196.50131660269, 258585.17060494734, 257755.976878766, 259824.29267382011], 'BIC': [285262.09744653094, 292213.73348023742, 286825.93886588927, 282898.19996735867, 289889.07547585049, 284474.6545572257, 284587.4968019739, 291636.02904133906, 286443.12750535476, 288154.43215797161, 295275.11894206965, 290227.40826303756, 292272.81157642574, 299403.7384451644, 294151.08313791396, 301970.81481891294]} list_values_K=[1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 4.0, 5.0, 6.0, 5.0, 6.0]
origin='lower', extent=[ min(list_values_K) - 1, max(list_values_K) + 1, min(list_values_L) - 1, max(list_values_L) + 1 ]) plt.scatter(list_values_K, list_values_L, c=values, cmap='jet_r') plt.colorbar() plt.title("Metric: %s." % metric) plt.xlabel("K") plt.ylabel("L") plt.show() # Print the best value best_K, best_L = greedy_search.best_value(metric) print "Best K,L for metric %s: %s,%s." % (metric, best_K, best_L) # Also print out all values in a dictionary all_values = {} for metric in metrics: (_, _, values) = zip(*numpy.array(greedy_search.all_values(metric))) all_values[metric] = list(values) print "all_values = %s \nlist_values_K=%s \nlist_values_L=%s" % \ (all_values,list(list_values_K),list(list_values_L)) ''' all_values = {'MSE': [3.0272042551947203, 3.027204256305112, 3.0272042923576148, 2.5914654932112464, 2.5918836849320201, 2.5914602381010914, 2.3493739958858635, 2.3511225674996381, 2.3584324978814539, 2.1868222893761833, 2.1911559705091568, 2.2016668628098452, 2.0510257720785683, 2.0546897432717603, 2.0586496735360251, 2.0826309185454925], 'loglikelihood': [-138379.73430838491, -138379.74014614287, -138380.57362950334, -132935.31284836732, -132949.35927074254, -132936.87960196467, -129506.33023969264, -129543.6258747291, -129641.90589727147, -127005.00858615834, -127072.80234078577, -127243.67779180428, -124768.25065830135, -124835.58530247367, -124903.988439383, -125310.14633691005], 'AIC': [278283.46861676982, 279529.48029228573, 278565.14725900668, 268922.62569673464, 270198.71854148508, 269207.75920392934, 263596.66047938529, 264921.2517494582, 264151.81179454294, 260130.01717231667, 261517.60468157154, 260893.35558360856, 257196.50131660269, 258585.17060494734, 257755.976878766, 259824.29267382011], 'BIC': [285262.09744653094, 292213.73348023742, 286825.93886588927, 282898.19996735867, 289889.07547585049, 284474.6545572257, 284587.4968019739, 291636.02904133906, 286443.12750535476, 288154.43215797161, 295275.11894206965, 290227.40826303756, 292272.81157642574, 299403.7384451644, 294151.08313791396, 301970.81481891294]} list_values_K=[1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 4.0, 5.0, 6.0, 5.0, 6.0] list_values_L=[1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 6.0, 6.0] '''