def format_target_indices_for_regression_conditioning(data,unreduced_fitness_values,fitness_funcs,
                                               fitness_dim):
    #create fitness targets
    fitness_targets= _format_fitness_targets_regression(fitness_funcs,unreduced_fitness_values,
                                                        fitness_dim)

    #create fitness indices
    fitness_size= len(list(ut.flatten(fitness_targets)))
    data_size=len(list(ut.flatten(data[0])))
    indices=np.arange(data_size,data_size+fitness_size)
    return indices,fitness_targets
def format_data_for_conditional(parent_sample,parent_vars,sibling_samples,sibling_vars,sibling_order):
    #flatten list for calculation of cond distr
    #parent condition variable
    parent_values=list(ut.flatten([parent_sample.values["ind"][var.name] for var in parent_vars]))
    #sibling condition variable
    #only retrieve values of necessary siblings,for sibling order i take last i siblings
    sibling_values=list(ut.flatten([sibling.values["ind"][var.name] for var in sibling_vars
    for sibling in sibling_samples[-sibling_order:]]))
    #the order of values is by convention, also enforced in the sampling and learning procedure
    values=np.concatenate((parent_values,sibling_values))
    #the first X are cond attributes
    indices=np.arange(0,len(values))
    return indices,values
示例#3
0
def format_target_indices_for_regression_conditioning(data,
                                                      unreduced_fitness_values,
                                                      fitness_funcs,
                                                      fitness_dim):
    #create fitness targets
    fitness_targets = _format_fitness_targets_regression(
        fitness_funcs, unreduced_fitness_values, fitness_dim)

    #create fitness indices
    fitness_size = len(list(ut.flatten(fitness_targets)))
    data_size = len(list(ut.flatten(data[0])))
    indices = np.arange(data_size, data_size + fitness_size)
    return indices, fitness_targets
示例#4
0
def format_data_for_training(parent, parent_var_names, siblings,
                             sibling_var_names):
    data = list(
        ut.flatten([parent.values["ind"][name] for name in parent_var_names] +
                   [[child.values["ind"][name] for name in sibling_var_names]
                    for child in siblings]))
    return data
示例#5
0
def format_data_for_conditional(parent_sample, parent_vars, sibling_samples,
                                sibling_vars, sibling_order):
    #flatten list for calculation of cond distr
    #parent condition variable
    parent_values = list(
        ut.flatten(
            [parent_sample.values["ind"][var.name] for var in parent_vars]))
    #sibling condition variable
    #only retrieve values of necessary siblings,for sibling order i take last i siblings
    sibling_values = list(
        ut.flatten([
            sibling.values["ind"][var.name] for var in sibling_vars
            for sibling in sibling_samples[-sibling_order:]
        ]))
    #the order of values is by convention, also enforced in the sampling and learning procedure
    values = np.concatenate((parent_values, sibling_values))
    #the first X are cond attributes
    indices = np.arange(0, len(values))
    return indices, values
def _reduce_fitness_dimension(fitness_value_line,
                             fitness_dim,fitness_comb):

    if fitness_dim[0] is FitnessInstanceDim.seperate and  fitness_dim[1] is FitnessFuncDim.seperate:
        return fitness_value_line
    if fitness_dim[0] is FitnessInstanceDim.single and  fitness_dim[1] is FitnessFuncDim.seperate:
        return [_combine_fitness(fn_func_value,fitness_comb) for fn_func_value in fitness_value_line]
    if fitness_dim[0] is FitnessInstanceDim.single and  fitness_dim[1] is FitnessFuncDim.single:
        #turn back in array because the fitness value array should be 2D
        return [_combine_fitness(list(ut.flatten(fitness_value_line)),fitness_comb)]
    if fitness_dim[0] is FitnessInstanceDim.seperate and  fitness_dim[1] is FitnessFuncDim.single:
        #group the fitness values per siblings is not possible because some fitness functions can be filtered for certain siblings
        raise ValueError("Seperate siblings and single fitness function values is not supported.")
 def visualise(root_layout_sample,color_list,child_samples=None,ax=None):
     import model.mapping as mp
     if child_samples is None:
         samples=root_layout_sample.get_flat_list()
     else:
         samples=[root_layout_sample]+child_samples
     #map list by name
     polygon_dict=dict([(sample.name,[]) for sample in samples])
     for sample in samples:
         polygon_dict[sample.name].append(mp.map_layoutsample_to_geometricobject(sample,"shape"))
     for name,color in color_list:
         print()
         vis.draw_polygons(polygons=polygon_dict[name],ax=ax,color=color)
     xrange,yrange=ut.range_from_polygons(list(ut.flatten(polygon_dict.values())))
     ax=vis.get_ax(ax)
     ax.set_xlim(*xrange)
     ax.set_ylim(*yrange)
示例#8
0
 def visualise(root_layout_sample, color_list, child_samples=None, ax=None):
     import model.mapping as mp
     if child_samples is None:
         samples = root_layout_sample.get_flat_list()
     else:
         samples = [root_layout_sample] + child_samples
     #map list by name
     polygon_dict = dict([(sample.name, []) for sample in samples])
     for sample in samples:
         polygon_dict[sample.name].append(
             mp.map_layoutsample_to_geometricobject(sample, "shape"))
     for name, color in color_list:
         print()
         vis.draw_polygons(polygons=polygon_dict[name], ax=ax, color=color)
     xrange, yrange = ut.range_from_polygons(
         list(ut.flatten(polygon_dict.values())))
     ax = vis.get_ax(ax)
     ax.set_xlim(*xrange)
     ax.set_ylim(*yrange)
示例#9
0
def _reduce_fitness_dimension(fitness_value_line, fitness_dim, fitness_comb):

    if fitness_dim[0] is FitnessInstanceDim.seperate and fitness_dim[
            1] is FitnessFuncDim.seperate:
        return fitness_value_line
    if fitness_dim[0] is FitnessInstanceDim.single and fitness_dim[
            1] is FitnessFuncDim.seperate:
        return [
            _combine_fitness(fn_func_value, fitness_comb)
            for fn_func_value in fitness_value_line
        ]
    if fitness_dim[0] is FitnessInstanceDim.single and fitness_dim[
            1] is FitnessFuncDim.single:
        #turn back in array because the fitness value array should be 2D
        return [
            _combine_fitness(list(ut.flatten(fitness_value_line)),
                             fitness_comb)
        ]
    if fitness_dim[0] is FitnessInstanceDim.seperate and fitness_dim[
            1] is FitnessFuncDim.single:
        #group the fitness values per siblings is not possible because some fitness functions can be filtered for certain siblings
        raise ValueError(
            "Seperate siblings and single fitness function values is not supported."
        )
def format_data_for_training(parent,parent_var_names,siblings,sibling_var_names):
    data=list(ut.flatten([parent.values["ind"][name] for name in parent_var_names]+[[child.values["ind"][name] for name in sibling_var_names] for child in siblings]))
    return data
def fitness_value_bounds(fitness_values):

    return [(np.min(list(ut.flatten(np.array(fitness_values)[:,i]))),np.max(list(ut.flatten(np.array(fitness_values)[:,i])))) for i in range(len(fitness_values[0]))]
示例#12
0
def training(model,fitness_funcs,sibling_var_names,parent_var_names):

    sibling_order_sequence=training_params.sibling_order_sequence
    gmm_full=training_params.gmm_full

    sibling_data=training_params.sibling_data
    fitness_dim=training_params.sibling_data
    n_data=training_params.n_data
    poisson=training_params.poisson

    n_iter=training_params.n_iter
    n_trial=training_params.n_trial
    n_model_eval_data=training_params.n_model_eval_data

    n_components=training_params.n_components
    min_covar=training_params.min_covar

    regression=training_params.regression

    #experiment hyperparameters:

    fitness_average_threshhold=0.95
    fitness_func_threshhold=0.98

    #this sequence indicates the order of the markov chain between siblings [1,2]-> second child depends on the first
    #the third on the first and second
    #the first child is always independent

    sibling_data=dg.SiblingData.first
    fitness_dim=(dtfr.FitnessInstanceDim.seperate,dtfr.FitnessFuncDim.seperate)

    #the sibling order defines the size of the joint distribution that will be trained
    sibling_order=np.max(sibling_order_sequence)
    n_children=sibling_order+1

    #gmm marginalisation [order_1,order_2,..,order_sibling_order]

    #True->train full joint
    #False->derive (marginalise) from closest higher order



    child_name="child"
    #model to train on
    parent_node,parent_def=model
    child_nodes=parent_node.children[child_name]

    #training variables and fitness functions
    #this expliicitly also defines the format of the data


    sibling_vars=[parent_def.children[child_name].variables[name] for name in sibling_var_names]
    parent_vars=[parent_def.variables[name] for name in parent_var_names]
    if not all( var.stochastic() for var in sibling_vars+parent_vars):
        non_stoch_var=[var.name for var in sibling_vars+parent_vars if not var.stochastic()]
        raise ValueError("Only the distribution of stochastic variables can be trained on. The variables "+
                                                            str(non_stoch_var)+ "are not stochastic")
    #check if none of the vars is deterministic


    #this expliicitly also defines the format of the data
    #fitness func, order cap and regression target
    #fitness_funcs=[fn.Targetting_Fitness("Minimum distances",fn.min_dist_sb,fn.Fitness_Relation.pairwise_siblings,1,0,1,target=1),fn.Fitness("polygon overlap",fn.negative_overlap_pc,fn.Fitness_Relation.pairwise_parent_child,1,0,1)]
    #only the func order and cap is used for training

    model_evaluation = mev.ModelEvaluation(n_model_eval_data,parent_def,parent_node,parent_var_names,
                                               child_name,sibling_var_names,
                                               fitness_funcs,
                                               fitness_average_threshhold,fitness_func_threshhold)

    score=model_evaluation.evaluate()
    startscore=score
    delta_score=0
    print("score before training: ", score)

    #check sibling sequence
    wrong_sequence = any(sibling_order_sequence[i]>i for i in range(len(sibling_order_sequence)))
    if wrong_sequence:
        print(sibling_order_sequence)
        raise ValueError("Some orders of the sibling order sequence exceed the number of previous siblings.")
    max_children=parent_def.variable_range(child_name)[1]
    if len(sibling_order_sequence) != max_children:
        raise ValueError("The number of siblings implied by the sibling order sequence can not be different than the maximum number of children in the model.")
    #check marginalisation
    if len(gmm_full) != n_children:
        raise ValueError("the array defining which sibling order to train seperately should have the same length as the maximum amount of children for a given sibling order. \n length array: ",len(gmm_full),", expected: ",n_children)
    #do n_iter number of retrainings using previously best model
    #before iterating set the variable that will control whether a new model is an improvement
    iteration_gmm_score=score
    for iteration in range(n_iter):

        #find out the performance of the current model

        data,fitness_values=dg.training_data_generation(n_data,parent_def,
                                parent_node,parent_var_names,
                                child_name,sibling_var_names,n_children,
                                fitness_funcs,
                                sibling_data=sibling_data,poisson=poisson)

        if print_params.verbose_iter:
            model_evaluation.print_evaluation(fitness_values,iteration,summary=not print_params.print_fitness_bins)

        if model_evaluation.converged(fitness_values):
            return

        #combine fitness per func
        #evaluate model at the start of every iteration

        gmm_vars_retry_eval=[]
        #do n trials to find a better gmm for the model
        for trial in range(n_trial):
            #calculate all full joints
            gmms=[None]*n_children
            for child_index in np.where(gmm_full)[0]:
                #generate data for each number of children
                data,fitness_values=dg.training_data_generation(n_data,parent_def,
                                parent_node,parent_var_names,
                                child_name,sibling_var_names,child_index+1,
                                fitness_funcs,
                               sibling_data,poisson)
                gmm = GMM(n_components=n_components,random_state=setting_values.random_state)
                data,fitness_values=dtfr.filter_fitness_and_data_training(data,fitness_values,
                                                                                  fitness_funcs)
                if regression:

                    fitness_values=dtfr.apply_fitness_order(fitness_values,fitness_funcs)

                    fitness_regression=dtfr.reduce_fitness_dimension(fitness_values,fitness_dim,
                                                                        dtfr.FitnessCombination.product)
                    #renormalise
                    fitness_regression=dtfr.normalise_fitness(fitness_regression)
                    fitness_regression=[ list(ut.flatten(fn_value_line)) for fn_value_line in fitness_regression]

                    #add fitness data
                    train_data=np.column_stack((data,fitness_regression))


                    #for regression calculate full joint
                    gmm.fit(train_data,infinite=False,min_covar=min_covar)

                    indices,targets = dtfr.format_target_indices_for_regression_conditioning(data,fitness_values,
                                                                                             fitness_funcs,
                                                                                             fitness_dim)

                    #condition on fitness
                    gmm= gmm.condition(indices,targets)

                else:

                    fitness_values=dtfr.apply_fitness_order(fitness_values,fitness_funcs)
                    #reduce fitness to a single dimension
                    fitness_single=dtfr.reduce_fitness_dimension(fitness_values,(dtfr.FitnessInstanceDim.single,
                                                                         dtfr.FitnessFuncDim.single),
                                                                        dtfr.FitnessCombination.product)
                    #renormalise
                    fitness_single=dtfr.normalise_fitness(fitness_single)

                    gmm.fit(data,np.array(fitness_single)[:,0],infinite=False,min_covar=min_covar)
                gmms[child_index]=gmm

            #marginalise gmms, starting from the largest
            for child_index in reversed(range(n_children)):
                if not gmms[child_index]:
                    gmms[child_index]=dtfr.marginalise_gmm(gmms,child_index,parent_vars,sibling_vars)
            gmm_var_name="test"+str(iteration)
            gmm_vars=_construct_gmm_vars(gmms,gmm_var_name,parent_def,parent_node,child_name,
                         parent_var_names,sibling_var_names)

            #the gmms are ordered the same as the children
            #use sibling order sequence to assign gmms[i] to the a child with order i
            #assign variable child i with gmm min(i,sibling_order)
            for k in range(len(child_nodes)):
                child_nodes[k].set_learned_variable(gmm_vars[sibling_order_sequence[k]])

            #evaluate new model

            score=model_evaluation.evaluate()

            gmm_vars_retry_eval.append((gmm_vars,score))
            if print_params.verbose_trial:
                print()
                print("trial: ", trial," score: ",score)

            #put original vars back
            for i in range(len(child_nodes)):
                child_nodes[i].delete_learned_variable(gmm_var_name)
        #check which gmm performed best
        max_gmm_vars=None
        for gmm_vars,gmm_score in gmm_vars_retry_eval:
            if gmm_score>iteration_gmm_score:
                max_gmm_vars=gmm_vars
                iteration_gmm_score=gmm_score
                delta_score=iteration_gmm_score-startscore
        #if it is better as the previous iteration-
        #inject new variable
        #else print that training didn't help
        gmm_scores=[gmm_score for gmm_vars,gmm_score in gmm_vars_retry_eval ]
        print("iteration ",iteration, " trial score mean: ", np.mean(gmm_scores)," variance: ",np.var(gmm_scores))
        if max_gmm_vars:
            print("improved selected with score: ",iteration_gmm_score )

            for i in range(len(child_nodes)):
                child_nodes[i].set_learned_variable(max_gmm_vars[sibling_order_sequence[i]])
        else:
            print("The did not improve over consecutive training iteration.")
            break
    if print_params.verbose_final_extra:
        print()
        print("final evaluation of fitness" )
        data,fitness_values=dg.training_data_generation(n_model_eval_data,parent_def,
                                parent_node,parent_var_names,
                                child_name,sibling_var_names,n_children,
                                fitness_funcs,
                                sibling_data=sibling_data,poisson=False)
        model_evaluation.print_evaluation(fitness_values,-1,summary=not print_params.print_fitness_bins)
        print("score gain: ", str(delta_score))
    if print_params.visual and max_gmm_vars:
        for gmm_var in max_gmm_vars:
           vis.draw_1D_2D_GMM_variable_sampling(gmm_var,training_params.title,training_params.extra_info)
    if print_params.print_parameters_set:

        print("fitness parameters,")
        for fitn in fitness_funcs:
            print(str(fitn))
            print(",")
        print()

        print("model parameters")
        print("parent variables,",str(parent_var_names))

        print("sibling variables,", str(sibling_var_names))

        print()
    return delta_score
gmm = GMM(n_components=15)

regression=False
if regression:
    #filter
    data,fitness_values=dtfr.filter_fitness_and_data_training(data,fitness_values,
                                                                                  fitness_funcs)
    #apply order
    fitness_values=dtfr.apply_fitness_order(fitness_values,fitness_funcs)

    #reduce
    fitness_regression=dtfr.reduce_fitness_dimension(fitness_values,fitness_dim,
                                                        dtfr.FitnessCombination.product)
    #renormalise
    fitness_regression=dtfr.normalise_fitness(fitness_regression)
    fitness_regression=[ list(ut.flatten(fn_value_line)) for fn_value_line in fitness_regression]

    #add fitness data
    train_data=np.column_stack((data,fitness_regression))

    gmm = GMM(n_components=5)
    #for regression calculate full joint
    gmm.fit(train_data,infinite=False,min_covar=0.01)

    indices,targets = dtfr.format_target_indices_for_regression_conditioning(data,fitness_values,
                                                                             fitness_funcs,
                                                                             fitness_dim)

    #condition on fitness
    gmm= gmm.condition(indices,targets)
示例#14
0
def training(model, fitness_funcs, sibling_var_names, parent_var_names):

    sibling_order_sequence = training_params.sibling_order_sequence
    gmm_full = training_params.gmm_full

    sibling_data = training_params.sibling_data
    fitness_dim = training_params.sibling_data
    n_data = training_params.n_data
    poisson = training_params.poisson

    n_iter = training_params.n_iter
    n_trial = training_params.n_trial
    n_model_eval_data = training_params.n_model_eval_data

    n_components = training_params.n_components
    min_covar = training_params.min_covar

    regression = training_params.regression

    #experiment hyperparameters:

    fitness_average_threshhold = 0.95
    fitness_func_threshhold = 0.98

    #this sequence indicates the order of the markov chain between siblings [1,2]-> second child depends on the first
    #the third on the first and second
    #the first child is always independent

    sibling_data = dg.SiblingData.first
    fitness_dim = (dtfr.FitnessInstanceDim.seperate,
                   dtfr.FitnessFuncDim.seperate)

    #the sibling order defines the size of the joint distribution that will be trained
    sibling_order = np.max(sibling_order_sequence)
    n_children = sibling_order + 1

    #gmm marginalisation [order_1,order_2,..,order_sibling_order]

    #True->train full joint
    #False->derive (marginalise) from closest higher order

    child_name = "child"
    #model to train on
    parent_node, parent_def = model
    child_nodes = parent_node.children[child_name]

    #training variables and fitness functions
    #this expliicitly also defines the format of the data

    sibling_vars = [
        parent_def.children[child_name].variables[name]
        for name in sibling_var_names
    ]
    parent_vars = [parent_def.variables[name] for name in parent_var_names]
    if not all(var.stochastic() for var in sibling_vars + parent_vars):
        non_stoch_var = [
            var.name for var in sibling_vars + parent_vars
            if not var.stochastic()
        ]
        raise ValueError(
            "Only the distribution of stochastic variables can be trained on. The variables "
            + str(non_stoch_var) + "are not stochastic")
    #check if none of the vars is deterministic

    #this expliicitly also defines the format of the data
    #fitness func, order cap and regression target
    #fitness_funcs=[fn.Targetting_Fitness("Minimum distances",fn.min_dist_sb,fn.Fitness_Relation.pairwise_siblings,1,0,1,target=1),fn.Fitness("polygon overlap",fn.negative_overlap_pc,fn.Fitness_Relation.pairwise_parent_child,1,0,1)]
    #only the func order and cap is used for training

    model_evaluation = mev.ModelEvaluation(n_model_eval_data, parent_def,
                                           parent_node, parent_var_names,
                                           child_name, sibling_var_names,
                                           fitness_funcs,
                                           fitness_average_threshhold,
                                           fitness_func_threshhold)

    score = model_evaluation.evaluate()
    startscore = score
    delta_score = 0
    print("score before training: ", score)

    #check sibling sequence
    wrong_sequence = any(sibling_order_sequence[i] > i
                         for i in range(len(sibling_order_sequence)))
    if wrong_sequence:
        print(sibling_order_sequence)
        raise ValueError(
            "Some orders of the sibling order sequence exceed the number of previous siblings."
        )
    max_children = parent_def.variable_range(child_name)[1]
    if len(sibling_order_sequence) != max_children:
        raise ValueError(
            "The number of siblings implied by the sibling order sequence can not be different than the maximum number of children in the model."
        )
    #check marginalisation
    if len(gmm_full) != n_children:
        raise ValueError(
            "the array defining which sibling order to train seperately should have the same length as the maximum amount of children for a given sibling order. \n length array: ",
            len(gmm_full), ", expected: ", n_children)
    #do n_iter number of retrainings using previously best model
    #before iterating set the variable that will control whether a new model is an improvement
    iteration_gmm_score = score
    for iteration in range(n_iter):

        #find out the performance of the current model

        data, fitness_values = dg.training_data_generation(
            n_data,
            parent_def,
            parent_node,
            parent_var_names,
            child_name,
            sibling_var_names,
            n_children,
            fitness_funcs,
            sibling_data=sibling_data,
            poisson=poisson)

        if print_params.verbose_iter:
            model_evaluation.print_evaluation(
                fitness_values,
                iteration,
                summary=not print_params.print_fitness_bins)

        if model_evaluation.converged(fitness_values):
            return

        #combine fitness per func
        #evaluate model at the start of every iteration

        gmm_vars_retry_eval = []
        #do n trials to find a better gmm for the model
        for trial in range(n_trial):
            #calculate all full joints
            gmms = [None] * n_children
            for child_index in np.where(gmm_full)[0]:
                #generate data for each number of children
                data, fitness_values = dg.training_data_generation(
                    n_data, parent_def, parent_node, parent_var_names,
                    child_name, sibling_var_names, child_index + 1,
                    fitness_funcs, sibling_data, poisson)
                gmm = GMM(n_components=n_components,
                          random_state=setting_values.random_state)
                data, fitness_values = dtfr.filter_fitness_and_data_training(
                    data, fitness_values, fitness_funcs)
                if regression:

                    fitness_values = dtfr.apply_fitness_order(
                        fitness_values, fitness_funcs)

                    fitness_regression = dtfr.reduce_fitness_dimension(
                        fitness_values, fitness_dim,
                        dtfr.FitnessCombination.product)
                    #renormalise
                    fitness_regression = dtfr.normalise_fitness(
                        fitness_regression)
                    fitness_regression = [
                        list(ut.flatten(fn_value_line))
                        for fn_value_line in fitness_regression
                    ]

                    #add fitness data
                    train_data = np.column_stack((data, fitness_regression))

                    #for regression calculate full joint
                    gmm.fit(train_data, infinite=False, min_covar=min_covar)

                    indices, targets = dtfr.format_target_indices_for_regression_conditioning(
                        data, fitness_values, fitness_funcs, fitness_dim)

                    #condition on fitness
                    gmm = gmm.condition(indices, targets)

                else:

                    fitness_values = dtfr.apply_fitness_order(
                        fitness_values, fitness_funcs)
                    #reduce fitness to a single dimension
                    fitness_single = dtfr.reduce_fitness_dimension(
                        fitness_values, (dtfr.FitnessInstanceDim.single,
                                         dtfr.FitnessFuncDim.single),
                        dtfr.FitnessCombination.product)
                    #renormalise
                    fitness_single = dtfr.normalise_fitness(fitness_single)

                    gmm.fit(data,
                            np.array(fitness_single)[:, 0],
                            infinite=False,
                            min_covar=min_covar)
                gmms[child_index] = gmm

            #marginalise gmms, starting from the largest
            for child_index in reversed(range(n_children)):
                if not gmms[child_index]:
                    gmms[child_index] = dtfr.marginalise_gmm(
                        gmms, child_index, parent_vars, sibling_vars)
            gmm_var_name = "test" + str(iteration)
            gmm_vars = _construct_gmm_vars(gmms, gmm_var_name, parent_def,
                                           parent_node, child_name,
                                           parent_var_names, sibling_var_names)

            #the gmms are ordered the same as the children
            #use sibling order sequence to assign gmms[i] to the a child with order i
            #assign variable child i with gmm min(i,sibling_order)
            for k in range(len(child_nodes)):
                child_nodes[k].set_learned_variable(
                    gmm_vars[sibling_order_sequence[k]])

            #evaluate new model

            score = model_evaluation.evaluate()

            gmm_vars_retry_eval.append((gmm_vars, score))
            if print_params.verbose_trial:
                print()
                print("trial: ", trial, " score: ", score)

            #put original vars back
            for i in range(len(child_nodes)):
                child_nodes[i].delete_learned_variable(gmm_var_name)
        #check which gmm performed best
        max_gmm_vars = None
        for gmm_vars, gmm_score in gmm_vars_retry_eval:
            if gmm_score > iteration_gmm_score:
                max_gmm_vars = gmm_vars
                iteration_gmm_score = gmm_score
                delta_score = iteration_gmm_score - startscore
        #if it is better as the previous iteration-
        #inject new variable
        #else print that training didn't help
        gmm_scores = [gmm_score for gmm_vars, gmm_score in gmm_vars_retry_eval]
        print("iteration ", iteration, " trial score mean: ",
              np.mean(gmm_scores), " variance: ", np.var(gmm_scores))
        if max_gmm_vars:
            print("improved selected with score: ", iteration_gmm_score)

            for i in range(len(child_nodes)):
                child_nodes[i].set_learned_variable(
                    max_gmm_vars[sibling_order_sequence[i]])
        else:
            print("The did not improve over consecutive training iteration.")
            break
    if print_params.verbose_final_extra:
        print()
        print("final evaluation of fitness")
        data, fitness_values = dg.training_data_generation(
            n_model_eval_data,
            parent_def,
            parent_node,
            parent_var_names,
            child_name,
            sibling_var_names,
            n_children,
            fitness_funcs,
            sibling_data=sibling_data,
            poisson=False)
        model_evaluation.print_evaluation(
            fitness_values, -1, summary=not print_params.print_fitness_bins)
        print("score gain: ", str(delta_score))
    if print_params.visual and max_gmm_vars:
        for gmm_var in max_gmm_vars:
            vis.draw_1D_2D_GMM_variable_sampling(gmm_var,
                                                 training_params.title,
                                                 training_params.extra_info)
    if print_params.print_parameters_set:

        print("fitness parameters,")
        for fitn in fitness_funcs:
            print(str(fitn))
            print(",")
        print()

        print("model parameters")
        print("parent variables,", str(parent_var_names))

        print("sibling variables,", str(sibling_var_names))

        print()
    return delta_score
示例#15
0
def fitness_value_bounds(fitness_values):

    return [(np.min(list(ut.flatten(np.array(fitness_values)[:, i]))),
             np.max(list(ut.flatten(np.array(fitness_values)[:, i]))))
            for i in range(len(fitness_values[0]))]