def create_and_train(num_epochs=1000, batch_size=100, epoch_batches=10, network_params_file_in=None,
                     network_params_file_out=None,
                     train_file=None,
                     network_fun=nf.create_network_10_speakers, with_validation=True):
    # load training data
    with open(train_file, 'rb') as f:
        (X, y, speaker_names) = pickle.load(f)

    # create symbolic theano variables
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    margin = T.scalar('margin')

    # create network
    network = network_fun(input_var)
    if network_params_file_in is not None:
        all_param_values = pickler.load(network_params_file_in)
        lasagne.layers.set_all_param_values(network, all_param_values)

    train_fn, val_fn = create_loss_functions_kl_div(input_var, network, target_var, margin)

    # start training
    if not with_validation:
        val_fn = None

    # Train network
    train(X, y, num_epochs, train_fn, val_fn, SpectTrainBatchIterator(batch_size, epoch_batches, config),
          SpectValidBatchIterator(batch_size, epoch_batches, config))

    # Save if
    if network_params_file_out is not None:
        pickler.save(lasagne.layers.get_all_param_values(network), network_params_file_out)
Пример #2
0
def analyse_results(network_name, checkpoint_names, set_of_predicted_clusters,
                    set_of_true_clusters, embedding_numbers, set_of_times,
                    set_of_utterance_embeddings):
    """
    Analyses each checkpoint with the values of set_of_predicted_clusters and set_of_true_clusters.
    After the analysis the result are stored in the Pickle network_name.pickle and the best Result
    according to min MR is stored in network_name_best.pickle.
    :param network_name: The name for the result pickle.
    :param checkpoint_names: A list of names from the checkpoints. Later used as curvenames,
    :param set_of_predicted_clusters: A 2D array of the predicted Clusters from the Network. [checkpoint, clusters]
    :param set_of_true_clusters: A 2d array of the validation clusters. [checkpoint, validation-clusters]
    :param embedding_numbers: A list which represent the number of embeddings in each checkpoint.
    :param set_of_times: A 2d array of the time per utterance [checkpoint, times]
    """

    logger = get_logger('analysis', logging.INFO)
    logger.info('Run analysis')
    metric_sets = [[None] * len(set_of_predicted_clusters)
                   for _ in range(len(metric_names))]

    for index, predicted_clusters in enumerate(set_of_predicted_clusters):
        checkpoint = checkpoint_names[index]
        logger.info('Analysing checkpoint:' + checkpoint)

        # Check if checkpoint is already stored
        analysis_pickle = get_results_intermediate_analysis(checkpoint)

        if os.path.isfile(analysis_pickle):
            (metric_results, eer_result) = load(analysis_pickle)
        else:
            metric_results = _calculate_analysis_values(
                predicted_clusters, set_of_true_clusters[index],
                set_of_times[index])
            eer_result = _calculate_eer_result(
                set_of_utterance_embeddings[index])
            save((metric_results, eer_result), analysis_pickle)

        logger.info("\tEER: {}".format(round(eer_result, 5)))

        for m, metric_result in enumerate(metric_results):
            metric_sets[m][index] = metric_result

    _write_result_pickle(network_name, checkpoint_names, metric_sets,
                         embedding_numbers)
    _save_best_results(network_name, checkpoint_names, metric_sets,
                       embedding_numbers)

    logger.info('Clearing intermediate result checkpoints')

    for checkpoint in checkpoint_names:
        analysis_pickle = get_results_intermediate_analysis(checkpoint)
        test_pickle = get_results_intermediate_test(checkpoint)

        if os.path.exists(analysis_pickle):
            os.remove(analysis_pickle)

        if os.path.exists(test_pickle):
            os.remove(test_pickle)

    logger.info('Analysis done')
Пример #3
0
def write_result_pickle(network_name, checkpoint_names, set_of_mrs, set_of_homogeneity_scores,
                        set_of_completeness_scores, number_of_embeddings ,algorithm):
    logger = get_logger('analysis', logging.INFO)

    save((checkpoint_names, set_of_mrs, set_of_homogeneity_scores, set_of_completeness_scores,
          number_of_embeddings), (get_result_pickle(network_name+"_"+algorithm)))
    logger.info('Write result pickle to ' + str((get_result_pickle(network_name+"_"+algorithm))))
Пример #4
0
def write_result_pickle(network_name, checkpoint_names, set_of_mrs,
                        set_of_homogeneity_scores, set_of_completeness_scores,
                        number_of_embeddings):
    logger = get_logger('analysis', logging.INFO)
    logger.info('Write result pickle')
    save((checkpoint_names, set_of_mrs, set_of_homogeneity_scores,
          set_of_completeness_scores, number_of_embeddings),
         get_result_pickle(network_name))
Пример #5
0
    def train_network(self):
        mixture_count = self.config.getint('gmm', 'mixturecount')
        X, y, speaker_names = load(
            get_speaker_pickle(self.config.get('train', 'pickle') + '_mfcc'))
        model = []

        for i in range(len(X)):
            features = X[i]
            gmm = mixture.GaussianMixture(n_components=mixture_count,
                                          covariance_type='diag',
                                          n_init=1)
            gmm.fit(features.transpose())
            speaker = {'mfccs': features, 'gmm': gmm}
            model.append(speaker)

        save(model, get_experiment_nets(self.name))
Пример #6
0
    def create_and_train(self, training_data):
        # Load training data
        x, y, speaker_names = load(training_data)

        # Create network
        net = create_net(self.create_paper(x.shape[1]))

        # Set new batch iterator
        net.batch_iterator_train = SegmentBatchIterator(batch_size=128)
        net.batch_iterator_test = SegmentBatchIterator(batch_size=128)
        net.train_split = TrainSplit(eval_size=0)

        # Train the network
        self.logger.info("Fitting...")
        net.fit(x, y)

        # Comments from old spectrogram_cnn_100 implementation, don't delete yet if eventually needed later
        # net.load_params_from('../data/experiments/paper/networks/net_100_81_not_reynolds.pickle');
        # net.save_params_to('../../data/experiments/paper/networks/net_100_81_not_reynolds.pickle');
        # network_helper.save(net, '../../data/experiments/paper/networks/net_100_81_not_reynolds.pickle')
        save(net, self.net_path)
Пример #7
0
def create_and_train(network_file, train_file, out_file):
    print("Loading static convolution...")
    x = T.tensor4('x')
    get_conv_output = prepare(network_file, x)
    print("Static convolution loaded!")

    print("Create vanilla network...")
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    margin = T.scalar('margin')
    network = network_factory.create_network_KL_clustering_no_convolution(
        input_var, input_size=1000, output_size=100)
    train_fn, val_fn = create_loss_functions_kl_div(input_var, network,
                                                    target_var, margin)
    train_batch_iterator = SpectWithSeparateConvTrainBatchIterator(
        batchsize=100,
        batches_per_epoch=10,
        config=config,
        input_dim=1000,
        get_conv_output=get_conv_output)
    valid_batch_iterator = SpectWithSeparateConvValidBatchIterator(
        batchsize=100,
        batches_per_epoch=10,
        config=config,
        input_dim=1000,
        get_conv_output=get_conv_output)
    print("Vanilla network created!")

    with open(train_file, 'rb') as f:
        (X, y, speaker_names) = pickle.load(f)
    clustering_network.train(X,
                             y,
                             num_epochs=1000,
                             train_fn=train_fn,
                             val_fn=val_fn,
                             train_iterator=train_batch_iterator,
                             validation_iterator=valid_batch_iterator)

    pickler.save(layers.get_all_param_values(network), out_file)
Пример #8
0
def _write_result_pickle(network_name, checkpoint_names, metric_sets,
                         number_of_embeddings):
    logger = get_logger('analysis', logging.INFO)
    logger.info('Write result pickle')
    save((checkpoint_names, metric_sets, number_of_embeddings),
         get_result_pickle(network_name))