Пример #1
0
def model_init(init_scheme, **flow):
    """
    Model initialization
    
    Input:
        ~ init_scheme (string) : specifies the initialization scheme to use
        ~ flow (dictionary) : flow control
        
    Returns:
        model (keras.models.Sequential) : initialized model
    """
    
    if flow['mnist']:
        input_dim = (784,)
        output_dim  = 10
    else:
        input_dim = (31,)
        output_dim  = 4
    #end
        
    print('Model ``{}`` initialization'.format(init_scheme))
    
    model = Sequential()
    
    if init_scheme == 'orth':
        weight_init = Orthogonal(gain = 1.0, seed = flow['seed'])
    elif init_scheme == 'normal':
        weight_init = RandomNormal(mean = 0.0, stddev = 0.1, seed = flow['seed'])
    elif init_scheme == 'glorot':
        weight_init = glorot_normal(seed = flow['seed'])
    #end
    
    bias_init = RandomNormal(mean = 0.0, stddev = 0.1, seed = flow['seed'])
    
    model.add(Dense(flow['network'][flow['seed']][0], activation = 'relu', input_shape = input_dim,
                    kernel_initializer   = weight_init,
                        bias_initializer = bias_init))
    
    for hidden_units in flow['network'][flow['seed']][1:]:
        model.add(Dense(hidden_units, activation = 'relu',
                            kernel_initializer = weight_init,
                            bias_initializer   = bias_init))
    #end
    
    model.add(Dense(output_dim, activation = 'softmax',
                    kernel_initializer = weight_init,
                    bias_initializer = bias_init))
    
    sgd = keras.optimizers.SGD(lr = 0.01, decay = 1e-6, momentum = 0.6, nesterov = True)
    model.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics = ['accuracy'])
    
    path_model_initialized = flow['path_output'] + r'\init'
    streams.check_create_directory(path_model_initialized)
    model.save(path_model_initialized + r'\model_init.h5')
    
    return model
Пример #2
0
def model_initialisation(N_input, N_classes, initialiser, 
                         seed_value, path_init):
    """
    Model initialization according to a scheme given as input
    
    Input:
        ~ N_input, N_classes        Integers
        ~ initialiser               string, among 'normal', 'orth', 'glorot'
        ~ seed_value                seed for reproducibility, it specifies the
                                    directory in which to store the results
        ~ path_init                 path to store the initialised model
            
    Returns:
        ~ model                     keras.Models.Sequential instance. A linear stack
                                    of layers. Parameters are initialised according
                                    to the scheme specified
                                    
    Note that the task to be performed by the network are straight-forward. An equally
    overall simple model and algorithmic setup suffices to capture the problem complexity
    """
    
    print("\nModel initialisation. Scheme:")
    
    model = Sequential()
    
    if (initialiser == 'orth'):
        print("Orthogonal weights initialisation")
        weights_initializer = Orthogonal(gain = 1.0, seed = seed_value)
    elif (initialiser == 'normal'):
        print("Normal weights initialisation")
        weights_initializer = RandomNormal(mean = 0.0,
                                          stddev = 0.1,
                                          seed = seed_value)
    elif (initialiser == 'glorot'):
        print("Glorot weights initialisation")
        weights_initializer = glorot_normal(seed = seed_value)
    elif (initialiser == 'zeros'):
        weights_initializer = Zeros()
    else:
        print('NO initialiser match')
    #end
    
    model.add(Dense(input_dim = N_input, units = 20,
                kernel_initializer = weights_initializer,
                bias_initializer = RandomNormal(mean = 0.0, 
                                                stddev = 0.1, 
                                                seed = seed_value),
                activation = 'relu'))
    model.add(Dense(input_dim = 20, units = 10,
                kernel_initializer = weights_initializer,
                bias_initializer = RandomNormal(mean = 0.0, 
                                                stddev = 0.1, 
                                                seed = seed_value),
                activation = 'relu'))
                
    model.add(Dense(units = N_classes,
                kernel_initializer = weights_initializer,
                bias_initializer = RandomNormal(mean = 0.0,
                                                stddev = 0.1,
                                                seed = seed_value),
                activation = 'softmax'))
                
    
    """
    The optimization algorithm details are defined here 
    once for all, the model is returned with these details
    embedded yet. Hereafter, in the actual training stage
    it is ready to use with the
    
        model.fit(args)
        
    method
    """
                
    sgd = keras.optimizers.SGD(lr = 0.01, decay = 1e-6, 
                               momentum = 0.6, nesterov = True)
    
    model.compile(loss = 'categorical_crossentropy', 
                  optimizer = sgd, metrics = ['accuracy'])
                
    streams.check_create_directory(path_init + r'\init')
    model.save(path_init + r'\init' + r'\model_init.h5')
    return model
Пример #3
0
def model_train_multitask(path_save_model,
                          dataset_id,
                          split_fraction,
                          plot, init_scheme):
                          
    """
    *** D E P R E C A T E D ***
    
    In the original spirit of the work, a multitask training was thought to
    be an interesting feature to analyse. In the full swing of the work by
    Kashtan and Alon ``Spontaneous evolution of modularity and network motifs'',
    PNAS September 27, 2005 102 (39) 13773-13778; https://doi.org/10.1073/pnas.0503610102,
    training was performed pericodically on both the data set, in order to observe
    an hypothetic topological response encoding commonalities and/or differences informations.
    
    
    In the multitask training, the dataset_id flag is set to 'mvg'. The two data sets
    are fetched and training is performed cyclically on both.
    
    The call signature and return type are the same as in the model_training
    function above, modulo differences in the labelling conventions
    """

    print("\n\nMultitask training.\n\n")
    
    model = load_model(path_save_model + r'\init\model_init.h5')
    
    X_1,Y_1 = load_data(r'DataSets/TreeLev2_DS_list.pkl')
    X_2,Y_2 = load_data(r'DataSets/Clusters_DS_list.pkl')
    
    Xtrain_1, Xtest_1, Ytrain_1, Ytest_1 = train_test_split(
            X_1, Y_1, test_size = split_fraction, random_state = 20)
    
    Xtrain_2, Xtest_2, Ytrain_2, Ytest_2 = train_test_split(
            X_2, Y_2, test_size = split_fraction, random_state = 20)
    
    es1 = EarlyStopping(monitor='val_acc', 
                        mode='auto', patience = 30, verbose = 0)
    es2 = EarlyStopping(monitor='val_loss', 
                        mode='auto', patience = 20, verbose = 0)
   
    
    
    params = model.get_weights()
    params_pre = params
    
    streams.check_create_directory(path_save_model + r'\images')
    path_save_pic = path_save_figs + r'\{}\{}_'.format(init_scheme,dataset_id)
    if (plot):
        plotNet = npl.plotNet(params_pre, path_save_pic, 
                              trained = False, asGraph = False)
        plotNet.plotNetFunction()
    #end
    
    for I in range(10):
        
        
        model.set_weights(params)
        model.fit(Xtrain_1, Ytrain_1,
            validation_split = 0.1, epochs = 100, 
            verbose = 0, callbacks = [es1, es2])
        if (I % 2 == 0):
            print("\nSuperepoch: {}, goal 1\n".format(I))
            print("Model evaluation on test data: loss and accuracy : ",
            model.evaluate(Xtest_1,Ytest_1, verbose = 2))
        params = model.get_weights()
        
        
        model.set_weights(params)
        model.fit(Xtrain_2, Ytrain_2,
            validation_split = 0.1, epochs = 100, 
            verbose = 0, callbacks = [es1, es2])
        if (I % 2 == 0):
            print("\nSuperepoch: {}, goal 2".format(I))
            print("Model evaluation on test data: loss and accuracy : ",
            model.evaluate(Xtest_2,Ytest_2, verbose = 2))
        params_post = model.get_weights()
        params = params_post
Пример #4
0
def model_training(path_save_model,
                   dataset_id, 
                   split_fraction,
                   plot, init_scheme):
                   
    """
    Proper training stage.
    
    As usual, proper directories are checked and if not present created,
    in order to store the trained model file in a devoted space.
    
    Input:
        ~ path_save_model           string, where to save the model (.h5)
        ~ dataset_id                string, environment specifier
        ~ split_fraction            float, the percentage/100 of held-out
                                    samples to evaluate the model with once trained
        ~ plot                      dict, containing a bool flag, specifying whether
                                    plot meaning figures or not
        ~ init_scheme               string, specifies the initialisation scheme used
        
    Returns:
        ~ model                     This time the keras.Models.Sequential type, instantiated
                                    in the model_initialisation function, is returned with
                                    the parameters adjusted according to the task learned
        ~ history.history['acc']    list, contains the values of the accuracy for each training
                                    epoch, so that it is possible to plot the learning profiles,
                                    if needed
                                    
    Note again that since the tasks are straight-forward, the only anti-overfit measure
    adopted is the early stopping.
    """
    
    print("\nModel Training with " + dataset_id + " data set.\n")
    
    model = load_model(path_save_model + r'\init\model_init.h5')
    params_pre = model.get_weights()
    
    streams.check_create_directory(path_save_model + r'\images')
    path_save_pic = path_save_figs + r'\{}\{}_'.format(init_scheme,dataset_id)
    if (plot['network']):
        plotNet = npl.plotNet(params_pre, path_save_pic, 
                              trained = False, asGraph = False)
        plotNet.plotNetFunction()
    #end
    
    if (dataset_id == 'tree'):
        X,Y = load_data(r'DataSets/TreeLev2_DS_list.pkl')
    elif (dataset_id == 'clus'):
        X,Y = load_data(r'DataSets/Clusters_DS_list.pkl')
    #end
    
    Xtrain, Xtest, Ytrain, Ytest = train_test_split(
            X, Y, test_size = split_fraction, random_state = 20)
    
    es1 = EarlyStopping(monitor='val_acc', 
                        mode='auto', patience = 30, verbose = 0)
    es2 = EarlyStopping(monitor='val_loss', 
                        mode='auto', patience = 20, verbose = 0)
    
    
    history = model.fit(Xtrain, Ytrain, 
                        validation_split = 0.1, 
                        epochs = 100, verbose = 0, 
                        callbacks = [es1,es2])
    
    if (plot['training']):
        plt.figure(figsize=(10,4))
        plt.subplot(1,2,1)
        plt.plot(history.history['acc'])
        plt.plot(history.history['val_acc'])
        plt.title('Model accuracy')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Validation'], loc='lower right')
        
        plt.subplot(1,2,2)
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Validation'], loc='upper right')
        plt.savefig(path_save_pic + "performance.png")
        plt.show()
    #end
    
    params_post = model.get_weights()    
    
    print("Model evaluation on test data: loss and accuracy\n",
        model.evaluate(Xtest,Ytest, verbose = 2))
Пример #5
0
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("ticks")


"""
For the sake of simplicity, the path to figures folder
is set to a global variable. The user is free to specify
whatever dicrectory is thought useful
"""


path_save_figs =  r'../figures'
streams.check_create_directory(path_save_figs)


def model_initialisation(N_input, N_classes, initialiser, 
                         seed_value, path_init):
    """
    Model initialization according to a scheme given as input
    
    Input:
        ~ N_input, N_classes        Integers
        ~ initialiser               string, among 'normal', 'orth', 'glorot'
        ~ seed_value                seed for reproducibility, it specifies the
                                    directory in which to store the results
        ~ path_init                 path to store the initialised model
            
    Returns:
Пример #6
0
detail = 'mtm'


#seeds = [3,5,6]
seeds = [618]


initialisations = ['orth','normal','glorot'] # U ['zeros'] ?
#initialisations = ['orth']
#datasets = ['init','tree','clus','mvg']
datasets = ['init','tree','clus']
#datasets = ['tree']

path_in_results = os.getcwd() + r'\Results'
path_save_figs =   # *** absolute path where figures are wanted to be saved ***
streams.check_create_directory(path_save_figs)


"""
The idea is to loop over:

    ~ seeds 
        
        ~ initialisation schemes
        
in such a way to execute the program once of all the possible configuration
Apposite directories are created, if not already present, on-fly, in such a way
to store the informations generated by the program execution.

Images are stored directly in the directory in which the written project 
is kept and worked. 
Пример #7
0
def spectrum_discretize(bins_edges, dataset, init_scheme, **flow):
    """
    This is the core of the procedure: the keras model is turned to
    a graph, via the ``proGraphDataStructure'' module functionalities.
    
    Input:
        ~ bins_edges (list of floats) : see above
        ~ datasets (list of strings) : see above
        ~ init_scheme (string) : see above
        ~ flow (dictionary) : see above
                                     
    Returns:
        ~ edges_df (pandas.DataFrame) : contains the edges meta-informations
                                        that is the nodes the edge links, the category associated
                                        to each edge, the connection strength. The category
                                        information is used to classify edges among strongly positive
                                        or negative, mildly positive or negative, negligible.
    """

    print('\nWeights specturm discretisation of ' + dataset + ' domain')

    model = load_model(flow['path_output'] +
                       r'\{}\model_{}.h5'.format(dataset, dataset))
    path_save_figures = flow['path_output'] + r'\_Figures'

    graph = graphds.proGraph(model)
    edges = graph.GetEdges()
    edges_df = pd.DataFrame.from_dict(edges,
                                      orient='index',
                                      columns=['edge', 'src', 'trg', 'param'])
    weights = np.asarray(edges_df['param'])
    if flow['plot']['preprocess']:
        spectrum_split_plot(weights, path_save_figures, dataset, bins_edges)
    #end

    edges_df = parameters_categories(edges_df, bins_edges)
    """
    NOTE: owing to the choice of having categories
        ~ to remove
        ~ mildly positive/negative
        ~ positive
        ~ negative
    and owing to the fact that the histogram exported by the bins_for_scheme function
    are five, this modification is in order, that is
        ~ the category 4 contains mildly positive, the is set to 2, which
        ~ is the category that already contains mildy negative values
        ~ category 3 contains null values, and is set to 4, the category that
          is then removed
        ~ category 5 contains positive value, but now category 3 has been set to 3 and
          category 4 has been set to 2, then it remains to set cat 5 to 3, that of positive
          values
    category 1 and 2, resp. negative and mildly negative, remain untouched.
    """

    edges_df.loc[edges_df['cats'] == 4, 'cats'] = 2
    edges_df.loc[edges_df['cats'] == 3, 'cats'] = 4
    edges_df.loc[edges_df['cats'] == 5, 'cats'] = 3

    edges_df = edges_df[edges_df['cats'] != 4]
    df_copy = edges_df
    edges_df = edges_df[['src', 'trg', 'cats']]

    # _edges = {'df_{}'.format(dataset) : edges_df, 'values_{}'.format(dataset) : weights_values}

    streams.check_create_directory(flow['path_output'] +
                                   r'\{}'.format(dataset))

    if flow['write_graph']:
        print('Writing Graph File\n')

        filename = flow['path_output'] + r'\{}\_{}_{}_Graph.txt'.format(
            dataset, dataset, flow['weighted_graph'])
        np.savetxt(filename, edges_df.values, fmt='%d')
    #end

    # return edges_df
    return df_copy
Пример #8
0
path_results, path_summary_plots, \
    path_latex_tables, path_serialized_dataframes = \
        streams.hierarchy(flow['seed'], **flow)

flow.update({'path_splots': path_summary_plots})

for init_scheme in initializations:
    """
    PREPROCESS: Initialization, training, graph construction
    _____________________________________________________________________________________________
    """

    path_results_initscheme = path_results + r'\{}'.format(init_scheme)
    path_save_figures = path_results_initscheme + r'\_Figures'
    streams.check_create_directory(path_results_initscheme)
    streams.check_create_directory(path_save_figures)

    flow.update({'path_figures': path_save_figures})
    flow.update({'path_output': path_results_initscheme})
    flow.update({'path_serialize': path_serialized_dataframes})

    if flow['initialize']:
        train.model_init(init_scheme, **flow)
    #end

    if flow['train']:
        for dataset in datasets[1:]:
            model, accuracy = train.model_train('', dataset, init_scheme,
                                                **flow)
        #end
Пример #9
0
def spectrum_discretize(path_in_dir, dataset_id, plot, weighted_graph,
                        write_file, init_scheme, bins_edges):
    """
    This is the core of the procedure: the keras model is turned to
    a graph, via the ``proGraphDataStructure'' module functionalities.
    
    Input:
        ~ path_in_dir               same as above
        ~ dataset_id                string, as above
        ~ plot                      dict, contains bools to instruct the program flow
                                    about whether display graphics or not
        ~ weighted_graph            char, instructs the program flow whether the
                                    graph is treated as weighted or not
        ~ write_file                char, instructs the program flow whether to 
                                    write the graph structure to file or not
        ~ init_scheme               as above
        ~ bins_edges                list of floats, where to place the subdivisions
                                    among categories
                                    
    Returns:
        ~ EdgesDF                   pandas.DataFrame, contains the edges meta-informations
                                    that is the nodes the edge links, the category associated
                                    to each edge, the connection strength. The category
                                    information is used to classify edges among strongly positive
                                    or negative, mildly positive or negative, negligible.
    """

    print("\nWeights specturm discretisation of " + dataset_id + " domain")

    model = load_model(path_in_dir +
                       r'\{}\model_{}.h5'.format(dataset_id, dataset_id))
    streams.check_create_directory(path_in_dir + r'\images')
    path_save_pic = path_save_figs + r'\{}\{}'.format(init_scheme, dataset_id)

    graph = pg.proGraph(model)

    Edges = graph.GetEdges()

    EdgesDF = pd.DataFrame.from_dict(Edges,
                                     orient="index",
                                     columns=["edge", "param"])

    weights = np.asarray(EdgesDF["param"])

    ssp.spectrum_split_plot(weights, path_save_pic, dataset_id, bins_edges)

    EdgesDF = ssp.CategoriseWeightsBiases(EdgesDF, bins_edges)
    """
    NOTE: owing to the choice of having categories
        ~ to remove
        ~ mildly positive/negative
        ~ positive
        ~ negative
    and owing to the fact that the histogram exported by the bins_for_scheme function
    are five, this modification is in order, that is
        ~ the category 4 contains mildly positive, the is set to 2, which
        ~ is the category that already contains mildy negative values
        ~ category 3 contains null values, and is set to 4, the category that
          is then removed
        ~ category 5 contains positive value, but now category 3 has been set to 3 and
          category 4 has been set to 2, then it remains to set cat 5 to 3, that of positive
          values
    category 1 and 2, resp. negative and mildly negative, remain untouched.
    """

    EdgesDF.loc[EdgesDF['cats'] == 4, 'cats'] = 2
    EdgesDF.loc[EdgesDF['cats'] == 3, 'cats'] = 4
    EdgesDF.loc[EdgesDF['cats'] == 5, 'cats'] = 3

    streams.check_create_directory(path_in_dir + r'\{}'.format(dataset_id))
    filename = path_in_dir + r'\{}\{}_{}_Graph.txt'.format(
        dataset_id, dataset_id, weighted_graph)

    if (write_file == "Y" or write_file == "y"):
        print("Writing Graph File\n")

        AdjLists = graph.GetAdjLists()
        with open(filename, 'w') as f:
            for _, i in enumerate(AdjLists):
                for j in range(len(AdjLists[i])):

                    l = AdjLists[i][j]
                    #                    tmp = EdgesDF.loc[EdgesDF["edge"] == (i,l)]

                    # we preventively EXCLUDE the elements which have category 4
                    # that is, weak connections
                    if (int(EdgesDF[EdgesDF['edge'] == (i, l)]['cats']) != 4):

                        if (weighted_graph == "u" or weighted_graph == "U"):
                            f.write("%s %s %s" % (i - 1, l - 1, 1))
                            f.write("\n")
                        else:
                            f.write("%s %s " % (i - 1, l - 1))
                            """
                            UNCOMMENT to account for nodes values (colors)
                            """
                            #                        f.write("%s %s %s " % (int(NodesDF.loc[i,"cats"]),
                            #                                               int(NodesDF.loc[l,"cats"]),
                            #                                               int(tmp.at[tmp.index[0],"cats"])))
                            cat = int(EdgesDF[EdgesDF['edge'] == (i,
                                                                  l)]['cats'])
                            f.write("%s " % (cat))
                            f.write("\n")
                        #end
                    #end
                #end
            #end
        f.close()
    #end

    return EdgesDF


#end
Пример #10
0
def model_train(path_results, dataset, init_scheme, **flow):
    """
    Model training with Stochastic Gradient Descent. Note that the 
    batch size is a fiddleable parameter.
    
    Input:
        ~ path_results (string) : main directory, from which the directory
                                  hierarchy stems
        ~ dataset (string) : see above
        ~ init_scheme (string) : see above
        
    Returns:
        ~ model (keras.models.Sequential) : trained model
        ~ history.history['acc'] : training accuracy. It is needed for the 
                                   efficacy plots
    """
    
    if flow['mnist']:
        Xtrain,Ytrain, Xtest,Ytest = load_mnist_data()
        batch_size = 128
    else:
        Xtrain,Ytrain, Xtest,Ytest = load_synth_data(dataset)
        batch_size = 20
    #end
        
    print('Model train with {} data set'.format(dataset))
    
    if path_results == '':
        model = load_model(flow['path_output'] + r'\init\model_init.h5')
    else:
        model = load_model(path_results + r'\{}\init\model_init.h5'.format(init_scheme))
    #end
    
    es1 = EarlyStopping(monitor='val_acc', 
                        mode='auto', patience = 30, verbose = 0)
    es2 = EarlyStopping(monitor='val_loss', 
                        mode='auto', patience = 20, verbose = 0)
    
    history = model.fit(Xtrain, Ytrain, batch_size = batch_size,
                        validation_split = 0.1, epochs = 20, verbose = 0,
                        callbacks = [es1,es2])
    
    
    path_save_model = flow['path_output'] + r'\{}'.format(dataset)
    streams.check_create_directory(path_save_model)
    score = model.evaluate(Xtest,Ytest, verbose = 2)
    scores_log = 'Test loss and accuracy : {:.6f} ; {:.6f} '.format(score[0], score[1])
    
    with open(path_save_model + r'\train_log.txt','w') as f:
        f.write('Training log\n')
        f.write(scores_log)
        print('Saved on log:\n' + scores_log)
    #end
    f.close()
    
    if flow['plot']['train']:
        plt.figure(figsize=(10,4))
        plt.subplot(1,2,1)
        plt.plot(history.history['acc'])
        plt.plot(history.history['val_acc'])
        plt.title('Model accuracy')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Validation'], loc='lower right')
        
        plt.subplot(1,2,2)
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Validation'], loc='upper right')
        plt.savefig(flow['path_figures'] + r'\{}_{}_performance.png'.format(init_scheme, dataset),
                    dpi=300, bbox_inches = 'tight')
        plt.show()
    #end
    
    if flow['save_model']:
        model.save(path_save_model + r'\model_{}.h5'.format(dataset))
    #end
    
    return model,history.history['acc']