def loop_all_combinations_for(dep, correlations, dataset):
    best_fit = ('', 0, 0, 9999, 9999, '', [0], [0], 0, (0, 0), 0)
    highest = ('', 0, 0, 9999, 9999, '', [0], [0], 0, 0, 0)

    subset = correlations
    for pca_n in range(configuration.pca_min_n,
                       min(configuration.pca_max_n + 1, len(correlations))):
        print("    generating pca depth ", pca_n)

        new = regression(dep, subset, dataset, pca_n)
        #util.display_result(dep + " current = ", new)

        #print(new[9][0])

        # saving if rsquared is better
        if (new[1] > highest[1]):
            highest = new

        # saving if fitness is better
        if (new[4] < best_fit[4]):
            best_fit = new

        # saving if fitness is better
        # if(new[9][0] > best_fit[9][0]):
        #     best_fit = new

    util.display_result(dep + " highest squared", highest)
    util.display_result(dep + " best fit", best_fit)
예제 #2
0
def loop_all_combinations_for(dep, correlations, dataset):
    best_fit = ('', 0, 0, 9999, 9999, '', [0], [0], 0, (0,0), 0)
    highest  = ('', 0, 0, 9999, 9999, '', [0], [0], 0, 0, 0)

    for length in range(2, min(len(correlations[0]) + 1, configuration.max_dept_logreg +1)):
        print("    generating length ", length)
        for subset in itertools.combinations(correlations[0], length):
            for pca_n in range(configuration.pca_min_n, min(configuration.pca_max_n + 1, len(subset)+1)): 
                #print("    generating pca depth ", pca_n)

                new = nb_model(dep, subset, dataset, pca_n)
                #util.display_result(dep + " current = ", new)

                #print(new[9][0])

                # saving if rsquared is better
                if(new[1] > highest[1]):
                    highest = new

                # saving if fitness is better
                # if(new[4] < best_fit[4]):
                #     best_fit = new

                # saving if fitness is better
                if(new[9][0] > best_fit[9][0]):
                    best_fit = new

    util.display_result(dep + " highest squared", highest)
    util.display_result(dep + " best fit", best_fit)
예제 #3
0
def loop_all_combinations_for(dep, correlations, dataset):
    # initialize vars with dummies and sentinels
    highest = ('', 0, 0, 9999, 9999, '', [0], [0], 0, 0, 0)
    highest_adj = ('', 0, 0, 9999, 9999, '', [0], [0], 0, 0, 0)
    best_fit = ('', 0, 0, 9999, 9999, '', [0], [0], 0, 0, 0)

    # first generate model with all independent variables
    # generating model with all independents
    new = regression(dep, configuration.independent, dataset)
    highest = new
    highest_adj = new
    best_fit = new
    util.display_result("model with all independents", new)

    # second:  generate model with only the major independent variables
    # generating model with all independents
    new = regression(dep, configuration.independent_major, dataset)
    util.display_result("model with only major independents", new)

    print("creating all combinations for ", dep, "of :")
    print(correlations[0])

    # generate all combinations for the dimensions that are potentially correlation for dimension <dep>
    for length in range(1, min(len(correlations[0]) + 1, configuration.max_depth +1)):
        print("    generating length ", length)
        for subset in itertools.combinations(correlations[0], length): 
            independents = " + ".join(subset)         
            #print("    ", dep, " => ", independents)
            
            # new contains metrics as a tuple
            # regression generates models and searches for best fit, highest rsquared, and rsquared_adjusted
            new = regression(dep, subset, dataset)
            if(new[1] > highest[1]):
                highest = new

            if(new[2] > highest_adj[2]):
                highest_adj = new

            if(new[9][0] > best_fit[9][0]):
                best_fit = new
    
    util.display_result(dep + " highest squared", highest)
    util.display_result(dep + " highest squared_adj", highest_adj)
    util.display_result(dep + " best fit", best_fit)
예제 #4
0
def loop_all_combinations_for(dep, correlations, dataset):
    highest = ('', 0, 0, 9999, 9999, '', [0], [0], 0, (0,0), 0)
    best_fit = ('', 0, 0, 9999, 9999, '', [0], [0], 0, (0,0), 0)

    for length in range(2, min(len(correlations[0]) + 1, configuration.max_dept_poly +1)):
            print("    generating length ", length)
            for subset in itertools.combinations(correlations[0], length):
                for degree in range(configuration.pol_min_degree, configuration.pol_max_degree + 1):
                    new = regression(dep, subset, dataset, degree)
                    #util.display_result(dep, new)

                    # saving if rsquared is better
                    if(new[1] > highest[1]):
                        highest = new

                    # saving if fitness is better
                    if(new[9][0] > best_fit[9][0]):
                        best_fit = new

    util.display_result(dep + " highest squared", highest)
    util.display_result(dep + " best fit", best_fit)
예제 #5
0
def loop_all_combinations_for(dep, correlations, dataset):
    independents = []
    independents_all = []
    best_fit = ('', 0, 0, 9999, 9999, '', [0], [0], 0)

    # first put all possible degree combinations in list ...
    for indep in correlations[0]:
        independents_all.append(indep)
        independents.append(indep)
        if not util.hasNumbers(indep):
            for degree in range(configuration.pol_min_degree,
                                configuration.pol_max_degree + 1):
                independents_all.append(indep + " ** " + str(degree))

    #print(independents)
    #print(independents_all)

    # ... and now create every combination
    #print("independents_all = ", independents_all)
    for length in range(
            1, min(len(independents_all) + 1,
                   configuration.max_dept_poly + 1)):
        print("    generating length ", length)
        for subset in itertools.combinations(independents_all, length):
            #print(subset)
            new = regression(dep, subset, dataset, independents)
            util.display_result(dep, new)

            if (new[4] < best_fit[4]):
                best_fit = new
                print("subset = ", subset)
                util.display_result(dep, new)

    util.display_result(dep + " best fit", best_fit)