示例#1
0
def test_best_value():
    I, J = 10, 9
    values_K = [1, 2, 4, 5]
    values_L = [5, 4, 3]
    R = 2 * numpy.ones((I, J))
    M = numpy.ones((I, J))
    priors = {'alpha': 3, 'beta': 4, 'lambdaF': 5, 'lambdaS': 6, 'lambdaG': 7}
    initFG = 'exp'
    initS = 'random'
    iterations = 11

    greedysearch = GreedySearch(classifier, values_K, values_L, R, M, priors,
                                initS, initFG, iterations)
    greedysearch.all_performances = {
        'BIC': [(1, 2, 10.), (2, 2, 20.), (2, 3, 30.), (2, 4, 5.),
                (5, 3, 20.)],
        'AIC': [(1, 2, 10.), (2, 2, 20.), (2, 3, 4.), (2, 4, 25.),
                (5, 3, 20.)],
        'loglikelihood': [(1, 2, 10.), (2, 2, 8.), (2, 3, 30.), (2, 4, 40.),
                          (5, 3, 20.)]
    }
    assert greedysearch.best_value('BIC') == (2, 4)
    assert greedysearch.best_value('AIC') == (2, 3)
    assert greedysearch.best_value('loglikelihood') == (2, 2)
    with pytest.raises(AssertionError) as error:
        greedysearch.all_values('FAIL')
    assert str(error.value) == "Unrecognised metric name: FAIL."
def test_best_value():
    I,J = 10,9
    values_K = [1,2,4,5]
    values_L = [5,4,3]
    R = 2*numpy.ones((I,J))
    M = numpy.ones((I,J))
    priors = { 'alpha':3, 'beta':4, 'lambdaF':5, 'lambdaS':6, 'lambdaG':7 }
    initFG = 'exp'
    initS = 'random'
    iterations = 11
    
    greedysearch = GreedySearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations)
    greedysearch.all_performances = {
        'BIC' : [(1,2,10.),(2,2,20.),(2,3,30.),(2,4,5.),(5,3,20.)],
        'AIC' : [(1,2,10.),(2,2,20.),(2,3,4.),(2,4,25.),(5,3,20.)],
        'loglikelihood' : [(1,2,10.),(2,2,8.),(2,3,30.),(2,4,40.),(5,3,20.)]
    }
    assert greedysearch.best_value('BIC') == (2,4)
    assert greedysearch.best_value('AIC') == (2,3)
    assert greedysearch.best_value('loglikelihood') == (2,2)
    with pytest.raises(AssertionError) as error:
        greedysearch.all_values('FAIL')
    assert str(error.value) == "Unrecognised metric name: FAIL."
    def run(self, burn_in=None, thinning=None, minimum_TN=None):
        folds_test = mask.compute_folds(self.I, self.J, self.folds, self.M)
        folds_training = mask.compute_Ms(folds_test)

        for i, (train, test) in enumerate(zip(folds_training, folds_test)):
            print "Fold %s." % (i + 1)

            # Run the greedy grid search
            greedy_search = GreedySearch(classifier=self.classifier,
                                         values_K=self.values_K,
                                         values_L=self.values_L,
                                         R=self.R,
                                         M=self.M,
                                         priors=self.priors,
                                         initS=self.init_S,
                                         initFG=self.init_FG,
                                         iterations=self.iterations,
                                         restarts=self.restarts)
            greedy_search.search(self.quality_metric,
                                 burn_in=burn_in,
                                 thinning=thinning,
                                 minimum_TN=minimum_TN)

            # Store the model fits, and find the best one according to the metric
            all_performances = greedy_search.all_values(
                metric=self.quality_metric)
            self.fout.write("All model fits for fold %s, metric %s: %s.\n" %
                            (i + 1, self.quality_metric, all_performances))
            self.fout.flush()

            best_KL = greedy_search.best_value(metric=self.quality_metric)
            self.fout.write("Best K,L for fold %s: %s.\n" % (i + 1, best_KL))

            # Train a model with this K and measure performance on the test set
            performance = self.run_model(train,
                                         test,
                                         best_KL[0],
                                         best_KL[1],
                                         burn_in=burn_in,
                                         thinning=thinning,
                                         minimum_TN=minimum_TN)
            self.fout.write("Performance: %s.\n\n" % performance)
            self.fout.flush()
    def run(self, burn_in=None, thinning=None, minimum_TN=None):
        folds_test = mask.compute_folds(self.I, self.J, self.folds, self.M)
        folds_training = mask.compute_Ms(folds_test)

        for i, (train, test) in enumerate(zip(folds_training, folds_test)):
            print "Fold %s." % (i + 1)

            # Run the greedy grid search
            greedy_search = GreedySearch(
                classifier=self.classifier,
                values_K=self.values_K,
                values_L=self.values_L,
                R=self.R,
                M=self.M,
                priors=self.priors,
                initS=self.init_S,
                initFG=self.init_FG,
                iterations=self.iterations,
                restarts=self.restarts,
            )
            greedy_search.search(self.quality_metric, burn_in=burn_in, thinning=thinning, minimum_TN=minimum_TN)

            # Store the model fits, and find the best one according to the metric
            all_performances = greedy_search.all_values(metric=self.quality_metric)
            self.fout.write(
                "All model fits for fold %s, metric %s: %s.\n" % (i + 1, self.quality_metric, all_performances)
            )
            self.fout.flush()

            best_KL = greedy_search.best_value(metric=self.quality_metric)
            self.fout.write("Best K,L for fold %s: %s.\n" % (i + 1, best_KL))

            # Train a model with this K and measure performance on the test set
            performance = self.run_model(
                train, test, best_KL[0], best_KL[1], burn_in=burn_in, thinning=thinning, minimum_TN=minimum_TN
            )
            self.fout.write("Performance: %s.\n\n" % performance)
            self.fout.flush()
        values_i,
        cmap="jet_r",
        vmin=min(values),
        vmax=max(values),
        origin="lower",
        extent=[min(list_values_K) - 1, max(list_values_K) + 1, min(list_values_L) - 1, max(list_values_L) + 1],
    )
    plt.scatter(list_values_K, list_values_L, c=values, cmap="jet_r")
    plt.colorbar()
    plt.title("Metric: %s." % metric)
    plt.xlabel("K")
    plt.ylabel("L")
    plt.show()

    # Print the best value
    best_K, best_L = greedy_search.best_value(metric)
    print "Best K,L for metric %s: %s,%s." % (metric, best_K, best_L)


# Also print out all values in a dictionary
all_values = {}
for metric in metrics:
    (_, _, values) = zip(*numpy.array(greedy_search.all_values(metric)))
    all_values[metric] = list(values)

print "all_values = %s \nlist_values_K=%s \nlist_values_L=%s" % (all_values, list(list_values_K), list(list_values_L))


"""
all_values = {'MSE': [3.0272042551947203, 3.027204256305112, 3.0272042923576148, 2.5914654932112464, 2.5918836849320201, 2.5914602381010914, 2.3493739958858635, 2.3511225674996381, 2.3584324978814539, 2.1868222893761833, 2.1911559705091568, 2.2016668628098452, 2.0510257720785683, 2.0546897432717603, 2.0586496735360251, 2.0826309185454925], 'loglikelihood': [-138379.73430838491, -138379.74014614287, -138380.57362950334, -132935.31284836732, -132949.35927074254, -132936.87960196467, -129506.33023969264, -129543.6258747291, -129641.90589727147, -127005.00858615834, -127072.80234078577, -127243.67779180428, -124768.25065830135, -124835.58530247367, -124903.988439383, -125310.14633691005], 'AIC': [278283.46861676982, 279529.48029228573, 278565.14725900668, 268922.62569673464, 270198.71854148508, 269207.75920392934, 263596.66047938529, 264921.2517494582, 264151.81179454294, 260130.01717231667, 261517.60468157154, 260893.35558360856, 257196.50131660269, 258585.17060494734, 257755.976878766, 259824.29267382011], 'BIC': [285262.09744653094, 292213.73348023742, 286825.93886588927, 282898.19996735867, 289889.07547585049, 284474.6545572257, 284587.4968019739, 291636.02904133906, 286443.12750535476, 288154.43215797161, 295275.11894206965, 290227.40826303756, 292272.81157642574, 299403.7384451644, 294151.08313791396, 301970.81481891294]} 
list_values_K=[1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 4.0, 5.0, 6.0, 5.0, 6.0] 
               origin='lower',
               extent=[
                   min(list_values_K) - 1,
                   max(list_values_K) + 1,
                   min(list_values_L) - 1,
                   max(list_values_L) + 1
               ])
    plt.scatter(list_values_K, list_values_L, c=values, cmap='jet_r')
    plt.colorbar()
    plt.title("Metric: %s." % metric)
    plt.xlabel("K")
    plt.ylabel("L")
    plt.show()

    # Print the best value
    best_K, best_L = greedy_search.best_value(metric)
    print "Best K,L for metric %s: %s,%s." % (metric, best_K, best_L)

# Also print out all values in a dictionary
all_values = {}
for metric in metrics:
    (_, _, values) = zip(*numpy.array(greedy_search.all_values(metric)))
    all_values[metric] = list(values)

print "all_values = %s \nlist_values_K=%s \nlist_values_L=%s" % \
    (all_values,list(list_values_K),list(list_values_L))
'''
all_values = {'MSE': [3.0272042551947203, 3.027204256305112, 3.0272042923576148, 2.5914654932112464, 2.5918836849320201, 2.5914602381010914, 2.3493739958858635, 2.3511225674996381, 2.3584324978814539, 2.1868222893761833, 2.1911559705091568, 2.2016668628098452, 2.0510257720785683, 2.0546897432717603, 2.0586496735360251, 2.0826309185454925], 'loglikelihood': [-138379.73430838491, -138379.74014614287, -138380.57362950334, -132935.31284836732, -132949.35927074254, -132936.87960196467, -129506.33023969264, -129543.6258747291, -129641.90589727147, -127005.00858615834, -127072.80234078577, -127243.67779180428, -124768.25065830135, -124835.58530247367, -124903.988439383, -125310.14633691005], 'AIC': [278283.46861676982, 279529.48029228573, 278565.14725900668, 268922.62569673464, 270198.71854148508, 269207.75920392934, 263596.66047938529, 264921.2517494582, 264151.81179454294, 260130.01717231667, 261517.60468157154, 260893.35558360856, 257196.50131660269, 258585.17060494734, 257755.976878766, 259824.29267382011], 'BIC': [285262.09744653094, 292213.73348023742, 286825.93886588927, 282898.19996735867, 289889.07547585049, 284474.6545572257, 284587.4968019739, 291636.02904133906, 286443.12750535476, 288154.43215797161, 295275.11894206965, 290227.40826303756, 292272.81157642574, 299403.7384451644, 294151.08313791396, 301970.81481891294]} 
list_values_K=[1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 4.0, 5.0, 6.0, 5.0, 6.0] 
list_values_L=[1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 6.0, 6.0]
'''