Exemplo n.º 1
0
 def train(self):
     ConsoleLogger.status("Running the experiment called '{}'".format(
         self._name))
     ConsoleLogger.status('Begins to train the model')
     self._trainer.train()
     ConsoleLogger.success(
         "Succeed to runned the experiment called '{}'".format(self._name))
Exemplo n.º 2
0
 def evaluate(self, evaluation_options):
     ConsoleLogger.status("Running the experiment called '{}'".format(
         self._name))
     ConsoleLogger.status('Begins to evaluate the model')
     self._evaluator.evaluate(evaluation_options)
     ConsoleLogger.success(
         "Succeed to runned the experiment called '{}'".format(self._name))
Exemplo n.º 3
0
    def test_global_conditioning(self):
        configuration = None
        with open('../../configurations/vctk_features.yaml', 'r') as configuration_file:
            configuration = yaml.load(configuration_file)
        device_configuration = DeviceConfiguration.load_from_configuration(configuration)
        data_stream = VCTKSpeechStream(configuration, device_configuration.gpu_ids, device_configuration.use_cuda)
        (x_enc, x_dec, speaker_id, _, _) = next(iter(data_stream.training_loader))

        ConsoleLogger.status('x_enc.size(): {}'.format(x_enc.size()))
        ConsoleLogger.status('x_dec.size(): {}'.format(x_dec.size()))

        x = x_dec.squeeze(-1)
        global_conditioning = GlobalConditioning.compute(
            speaker_dic=data_stream.speaker_dic,
            speaker_ids=speaker_id,
            x_one_hot=x,
            expand=False
        )
        self.assertEqual(global_conditioning.size(), torch.Size([1, 128, 1]))
        ConsoleLogger.success('global_conditioning.size(): {}'.format(global_conditioning.size()))

        expanded_global_conditioning = GlobalConditioning.compute(
            speaker_dic=data_stream.speaker_dic,
            speaker_ids=speaker_id,
            x_one_hot=x,
            expand=True
        )
        self.assertEqual(expanded_global_conditioning.size(), torch.Size([1, 128, 7680]))
        ConsoleLogger.success('expanded_global_conditioning.size(): {}'.format(expanded_global_conditioning.size()))
Exemplo n.º 4
0
    def _plot_merged_all_losses_type(self,
                                     all_results_paths,
                                     all_experiments_names,
                                     all_train_losses,
                                     all_train_perplexities,
                                     all_latest_epochs,
                                     colormap_name='tab20'):

        latest_epoch = all_latest_epochs[0]
        for i in range(1, len(all_latest_epochs)):
            if all_latest_epochs[i] != latest_epoch:
                raise ValueError(
                    'All experiments must have the same number of epochs to merge them'
                )

        results_path = all_results_paths[0]

        all_train_losses_smooth = dict()
        for i in range(len(all_train_losses)):
            for loss_name in all_train_losses[i].keys():
                if loss_name == 'loss':
                    continue
                if loss_name not in all_train_losses_smooth:
                    all_train_losses_smooth[loss_name] = list()
                all_train_losses_smooth[loss_name].append(
                    self._smooth_curve(all_train_losses[i][loss_name]))

        for loss_name in all_train_losses_smooth.keys():
            n_colors = len(all_train_losses_smooth[loss_name])
            colors = self._get_colors_from_cmap(colormap_name, n_colors)

            train_losses_smooth = all_train_losses_smooth[loss_name]
            all_train_loss_smooth = np.asarray(train_losses_smooth)
            all_train_loss_smooth = np.reshape(
                all_train_loss_smooth,
                (n_colors, latest_epoch,
                 all_train_loss_smooth.shape[1] // latest_epoch))

            fig, ax = plt.subplots(figsize=(8, 8))

            for j in range(len(all_train_loss_smooth)):
                ax = self._plot_fill_between(ax, colors[j],
                                             all_train_loss_smooth[j],
                                             all_experiments_names[j])
            ax = self._configure_ax(ax,
                                    title='Smoothed ' +
                                    loss_name.replace('_', ' '),
                                    xlabel='Epochs',
                                    ylabel='Loss',
                                    legend=True)
            output_plot_path = results_path + os.sep + loss_name + '.png'

            fig.savefig(output_plot_path)
            plt.close(fig)

            ConsoleLogger.success(
                "Saved figure at path '{}'".format(output_plot_path))
Exemplo n.º 5
0
    def _plot_loss_and_perplexity_figures(self, all_results_paths,
                                          all_experiments_names,
                                          all_train_losses,
                                          all_train_perplexities,
                                          all_latest_epochs, n_colors, colors):

        for i in range(len(all_experiments_names)):
            results_path = all_results_paths[i]
            experiment_name = all_experiments_names[i]
            output_plot_path = results_path + os.sep + experiment_name + '_loss-and-perplexity.png'

            train_loss_smooth = self._smooth_curve(all_train_losses[i]['loss'])
            train_perplexity_smooth = self._smooth_curve(
                all_train_perplexities[i])

            latest_epoch = all_latest_epochs[i]

            train_loss_smooth = np.asarray(train_loss_smooth)
            train_perplexity_smooth = np.asarray(train_perplexity_smooth)
            train_loss_smooth = np.reshape(
                train_loss_smooth,
                (latest_epoch, train_loss_smooth.shape[0] // latest_epoch))
            train_perplexity_smooth = np.reshape(
                train_perplexity_smooth,
                (latest_epoch,
                 train_perplexity_smooth.shape[0] // latest_epoch))

            fig = plt.figure(figsize=(16, 8))

            ax = fig.add_subplot(1, 2, 1)
            ax = self._plot_fill_between(ax, colors[i], train_loss_smooth,
                                         all_experiments_names[i])
            ax = self._configure_ax(ax,
                                    title='Smoothed loss',
                                    xlabel='Epochs',
                                    ylabel='Loss',
                                    legend=False)

            ax = fig.add_subplot(1, 2, 2)
            ax = self._plot_fill_between(ax, colors[i],
                                         train_perplexity_smooth,
                                         all_experiments_names[i])
            ax = self._configure_ax(ax,
                                    title='Smoothed average codebook usage',
                                    xlabel='Epochs',
                                    ylabel='Perplexity',
                                    legend=False)

            fig.savefig(output_plot_path)
            plt.close(fig)

            ConsoleLogger.success(
                "Saved figure at path '{}'".format(output_plot_path))
Exemplo n.º 6
0
    def compute_groundtruth_average_phonemes_number(self):
        alignments_dic = None
        with open(
                self._results_path + os.sep +
                'vctk_groundtruth_alignments.pickle', 'rb') as f:
            alignments_dic = pickle.load(f)

        extended_alignment_dataset = alignments_dic[
            'extended_alignment_dataset']

        phonemes_number = list()
        for _, alignment in extended_alignment_dataset:
            phonemes_number.append(len(np.unique(alignment)))
        ConsoleLogger.success(
            'The average number of phonemes per alignment for {} alignments is: {}'
            .format(len(extended_alignment_dataset),
                    np.mean(round(phonemes_number, 2))))
Exemplo n.º 7
0
    def export_to_features(self, vctk_path, configuration):
        if not os.path.isdir(vctk_path):
            raise ValueError(
                "VCTK dataset not found at path '{}'".format(vctk_path))

        # Create the features path directory if it doesn't exist
        features_path = vctk_path + os.sep + configuration['features_path']
        if not os.path.isdir(features_path):
            ConsoleLogger.status(
                'Creating features directory at path: {}'.format(
                    features_path))
            os.mkdir(features_path)
        else:
            ConsoleLogger.status(
                'Features directory already created at path: {}'.format(
                    features_path))

        # Create the features path directory if it doesn't exist
        train_features_path = features_path + os.sep + 'train'
        if not os.path.isdir(train_features_path):
            ConsoleLogger.status(
                'Creating train features directory at path: {}'.format(
                    train_features_path))
            os.mkdir(train_features_path)
        else:
            ConsoleLogger.status(
                'Train features directory already created at path: {}'.format(
                    train_features_path))

        # Create the features path directory if it doesn't exist
        val_features_path = features_path + os.sep + 'val'
        if not os.path.isdir(val_features_path):
            ConsoleLogger.status(
                'Creating val features directory at path: {}'.format(
                    val_features_path))
            os.mkdir(val_features_path)
        else:
            ConsoleLogger.status(
                'Val features directory already created at path: {}'.format(
                    val_features_path))

        def process(loader, output_dir, input_features_name,
                    output_features_name, rate, input_filters_number,
                    output_filters_number, input_target_shape,
                    augment_output_features, export_one_hot_features):

            initial_index = 0
            attempts = 10
            current_attempt = 0
            total_length = len(loader)

            while current_attempt < attempts:
                try:
                    i = initial_index
                    bar = tqdm(loader, initial=initial_index)
                    for data in bar:
                        (preprocessed_audio, one_hot, speaker_id, quantized,
                         wav_filename, sampling_rate, shifting_time,
                         random_starting_index, preprocessed_length,
                         top_db) = data

                        output_path = output_dir + os.sep + str(i) + '.pickle'
                        if os.path.isfile(output_path):
                            if os.path.getsize(output_path) == 0:
                                bar.set_description(
                                    '{} already exists but is empty. Computing it again...'
                                    .format(output_path))
                                os.remove(output_path)
                            else:
                                bar.set_description(
                                    '{} already exists'.format(output_path))
                            i += 1
                            continue

                        input_features = SpeechFeatures.features_from_name(
                            name=input_features_name,
                            signal=preprocessed_audio,
                            rate=rate,
                            filters_number=input_filters_number)

                        if input_features.shape[0] != input_target_shape[
                                0] or input_features.shape[
                                    1] != input_target_shape[1]:
                            ConsoleLogger.warn(
                                "Raw features number {} with invalid dimension {} will not be saved. Target shape: {}"
                                .format(i, input_features.shape,
                                        input_target_shape))
                            i += 1
                            continue

                        output_features = SpeechFeatures.features_from_name(
                            name=output_features_name,
                            signal=preprocessed_audio,
                            rate=rate,
                            filters_number=output_filters_number,
                            augmented=augment_output_features)

                        # TODO: add an option in configuration to save quantized/one_hot or not
                        output = {
                            'preprocessed_audio':
                            preprocessed_audio,
                            'wav_filename':
                            wav_filename,
                            'input_features':
                            input_features,
                            'one_hot':
                            one_hot
                            if export_one_hot_features else np.array([]),
                            'quantized':
                            np.array([]),
                            'speaker_id':
                            speaker_id,
                            'output_features':
                            output_features,
                            'shifting_time':
                            shifting_time,
                            'random_starting_index':
                            random_starting_index,
                            'preprocessed_length':
                            preprocessed_length,
                            'sampling_rate':
                            sampling_rate,
                            'top_db':
                            top_db
                        }

                        with open(output_path, 'wb') as file:
                            pickle.dump(output, file)

                        bar.set_description('{} saved'.format(output_path))

                        i += 1

                        if i == total_length:
                            bar.update(total_length)
                            break

                    bar.close()
                    break
                except KeyboardInterrupt:
                    bar.close()
                    ConsoleLogger.warn(
                        'Keyboard interrupt detected. Leaving the function...')
                    return
                except:
                    error_message = 'An error occured in the data loader at {}/{}. Current attempt: {}/{}'.format(
                        output_dir, i, current_attempt + 1, attempts)
                    self._logger.exception(error_message)
                    ConsoleLogger.error(error_message)
                    initial_index = i
                    current_attempt += 1
                    continue

        try:
            ConsoleLogger.status('Processing training part')
            process(
                loader=self._training_loader,
                output_dir=train_features_path,
                input_features_name=configuration['input_features_type'],
                output_features_name=configuration['output_features_type'],
                rate=configuration['sampling_rate'],
                input_filters_number=configuration['input_features_filters'],
                output_filters_number=configuration['output_features_filters'],
                input_target_shape=(configuration['input_features_dim'],
                                    configuration['input_features_filters'] *
                                    3),
                augment_output_features=configuration[
                    'augment_output_features'],
                export_one_hot_features=configuration[
                    'export_one_hot_features'])
            ConsoleLogger.success('Training part processed')
        except:
            ConsoleLogger.error(
                'An error occured during training features generation')

        try:
            ConsoleLogger.status('Processing validation part')
            process(
                loader=self._validation_loader,
                output_dir=val_features_path,
                input_features_name=configuration['input_features_type'],
                output_features_name=configuration['output_features_type'],
                rate=configuration['sampling_rate'],
                input_filters_number=configuration['input_features_filters'],
                output_filters_number=configuration['output_features_filters'],
                input_target_shape=(configuration['input_features_dim'],
                                    configuration['input_features_filters'] *
                                    3),
                augment_output_features=configuration[
                    'augment_output_features'],
                export_one_hot_features=configuration[
                    'export_one_hot_features'])
            ConsoleLogger.success('Validation part processed')
        except:
            ConsoleLogger.error(
                'An error occured during validation features generation')
        sys.exit(0)
    if args.fetch:
        Experiments.load(args.experiments_configuration_path).fetch()
        sys.exit(0)
    if args.export_to_features:
        configuration = load_configuration(default_configuration_path)
        configuration = update_configuration_from_experiments(
            args.experiments_configuration_path, configuration)
        device_configuration = DeviceConfiguration.load_from_configuration(
            configuration)
        data_stream = VCTKSpeechStream(configuration,
                                       device_configuration.gpu_ids,
                                       device_configuration.use_cuda)
        data_stream.export_to_features(default_dataset_path, configuration)
        ConsoleLogger.success(
            "VCTK exported to a new features dataset at: '{}'".format(
                default_dataset_path + os.sep +
                configuration['features_path']))
        sys.exit(0)

    if args.evaluate:
        Experiments.load(
            args.experiments_configuration_path).evaluate(evaluation_options)
        ConsoleLogger.success('All evaluating experiments done')
        sys.exit(0)

    if args.compute_dataset_stats:
        configuration = load_configuration(default_configuration_path)
        configuration = update_configuration_from_experiments(
            args.experiments_configuration_path, configuration)
        device_configuration = DeviceConfiguration.load_from_configuration(
            configuration)
Exemplo n.º 9
0
    fig, ax = plt.subplots()
    ax.plot(np.arange(N), sil_duration_gaps)
    ax.set_title(
        'Silence duration gap between montreal alignments and\nlibrosa loading with sil thresh at 20db'
    )
    ax.axhline(y=mean_sil_duration_gaps, xmin=0.0, xmax=1.0, color='r')

    yt = ax.get_yticks()
    yt = np.append(yt, mean_sil_duration_gaps)
    ax.set_yticks(yt)

    ax.set_ylabel('Time (s)')
    ax.set_xlabel('Number of audio samples')
    ax.set_ylim(bottom=0)
    fig.savefig('../results/sil_duration_gaps.png')
    plt.close(fig)

    ConsoleLogger.success(
        'mean sil duration gap: {}'.format(mean_sil_duration_gaps))

    with open('../results/sil_duration_gap_stats.pickle', 'wb') as file:
        pickle.dump(
            {
                'sil_duration_gaps': sil_duration_gaps,
                'audio_filenames': audio_filenames,
                'original_shifting_times': original_shifting_times,
                'beginning_trimmed_times': beginning_trimmed_times,
                'detected_sil_durations': detected_sil_durations,
                'mean_sil_duration_gaps': mean_sil_duration_gaps
            }, file)
Exemplo n.º 10
0
    def compute_clustering_metrics(self):
        groundtruth_alignments_dic = None
        with open(
                self._results_path + os.sep +
                'vctk_groundtruth_alignments.pickle', 'rb') as f:
            groundtruth_alignments_dic = pickle.load(f)

        empirical_alignments_dic = None
        with open(
                self._results_path + os.sep + self._experiment_name +
                '_vctk_empirical_alignments.pickle', 'rb') as f:
            empirical_alignments_dic = pickle.load(f)

        groundtruth_alignments = np.array(
            groundtruth_alignments_dic['extended_alignment_dataset'])
        possible_phonemes = list(
            groundtruth_alignments_dic['possible_phonemes'])
        empirical_alignments = np.array(
            empirical_alignments_dic['all_alignments'])
        phonemes_indices = {
            possible_phonemes[i]: i
            for i in range(len(possible_phonemes))
        }

        ConsoleLogger.status('#{} possible phonemes: {}'.format(
            len(possible_phonemes), possible_phonemes))
        ConsoleLogger.status('# of raw groundtruth alignments: {}'.format(
            len(groundtruth_alignments)))
        ConsoleLogger.status('# of raw empirical alignments: {}'.format(
            len(empirical_alignments)))

        groundtruth_utterance_keys = set()
        final_groundtruth_alignments = list()
        final_empirical_alignments = list()

        alignment_length = ((self._configuration['length'] /
                             self._configuration['sampling_rate']) * 100) / 2

        for (utterence_key, alignment) in groundtruth_alignments:
            if len(alignment) != alignment_length:  # FIXME
                ConsoleLogger.error(
                    'len(alignment) != alignment_length: {}'.format(
                        len(alignment)))
                continue
            groundtruth_utterance_keys.add(utterence_key)
            final_groundtruth_alignments.append([
                phonemes_indices[alignment[i]] for i in range(len(alignment))
            ])

        for (utterence_key, alignment) in empirical_alignments:
            if utterence_key in groundtruth_utterance_keys:
                final_empirical_alignments.append(alignment)

        final_groundtruth_alignments = np.asarray(final_groundtruth_alignments)
        final_empirical_alignments = np.asarray(final_empirical_alignments)

        ConsoleLogger.status('Groundtruth alignments shape: {}'.format(
            final_groundtruth_alignments.shape))
        ConsoleLogger.status('Empirical alignments shape: {}'.format(
            final_empirical_alignments.shape))

        ConsoleLogger.status('Groundtruth alignments samples: {}'.format(
            [final_groundtruth_alignments[i] for i in range(2)]))
        ConsoleLogger.status('Empirical alignments samples: {}'.format(
            [final_empirical_alignments[i] for i in range(2)]))

        concatenated_groundtruth_alignments = np.concatenate(
            final_groundtruth_alignments)
        concatenated_empirical_alignments = np.concatenate(
            final_empirical_alignments)

        ConsoleLogger.status(
            'Concatenated groundtruth alignments shape: {}'.format(
                concatenated_groundtruth_alignments.shape))
        ConsoleLogger.status(
            'Concatenated empirical alignments shape: {}'.format(
                concatenated_empirical_alignments.shape))

        adjusted_rand_score = sklearn.metrics.adjusted_rand_score(
            concatenated_groundtruth_alignments,
            concatenated_empirical_alignments)
        adjusted_mutual_info_score = sklearn.metrics.adjusted_mutual_info_score(
            concatenated_groundtruth_alignments,
            concatenated_empirical_alignments)
        normalized_mutual_info_score = sklearn.metrics.normalized_mutual_info_score(
            concatenated_groundtruth_alignments,
            concatenated_empirical_alignments)

        ConsoleLogger.success(
            'Adjusted rand score: {}'.format(adjusted_rand_score))
        ConsoleLogger.success('Adjusted mututal info score: {}'.format(
            adjusted_mutual_info_score))
        ConsoleLogger.success(
            'Normalized adjusted mututal info score: {}'.format(
                normalized_mutual_info_score))

        with open(
                self._results_path + os.sep + self._experiment_name +
                '_adjusted_rand_score.npy', 'wb') as f:
            np.save(f, adjusted_rand_score)

        with open(
                self._results_path + os.sep + self._experiment_name +
                '_adjusted_mutual_info_score.npy', 'wb') as f:
            np.save(f, adjusted_mutual_info_score)

        with open(
                self._results_path + os.sep + self._experiment_name +
                '_normalized_mutual_info_score.npy', 'wb') as f:
            np.save(f, normalized_mutual_info_score)

        ConsoleLogger.success(
            'All scores from cluestering metrics were successfully saved')
 def fetch(self):
     ConsoleLogger.status("Running the experiment called '{}'".format(self._name))
     ConsoleLogger.status('Begins to eval the model and save data')
     self._trainer.fetch()
     ConsoleLogger.success("Succeed to runned the experiment called '{}'".format(self._name))
Exemplo n.º 12
0
    def _plot_merged_all_losses_figures(self,
                                        all_results_paths,
                                        all_experiments_names,
                                        all_train_losses,
                                        all_train_perplexities,
                                        all_latest_epochs,
                                        colormap_name='tab20'):

        latest_epoch = all_latest_epochs[0]
        for i in range(1, len(all_latest_epochs)):
            if all_latest_epochs[i] != latest_epoch:
                raise ValueError(
                    'All experiments must have the same number of epochs to merge them'
                )

        results_path = all_results_paths[0]

        all_train_losses_smooth = list()
        for i in range(len(all_train_losses)):
            train_losses_smooth = list()
            train_losses_names = list()
            for key in all_train_losses[i].keys():
                train_loss_smooth = self._smooth_curve(
                    all_train_losses[i][key])
                train_losses_smooth.append(train_loss_smooth)
                train_losses_names.append(key)
            all_train_losses_smooth.append(
                (train_losses_smooth, train_losses_names))

        for i in range(len(all_train_losses_smooth)):
            n_colors = len(all_train_losses[i])
            colors = self._get_colors_from_cmap(colormap_name, n_colors)

            (train_losses_smooth,
             train_losses_names) = all_train_losses_smooth[i]
            all_train_loss_smooth = np.asarray(train_losses_smooth)
            all_train_loss_smooth = np.reshape(
                all_train_loss_smooth,
                (n_colors, latest_epoch,
                 all_train_loss_smooth.shape[1] // latest_epoch))

            fig, ax = plt.subplots(figsize=(8, 8))

            for j in range(len(all_train_loss_smooth)):
                ax = self._plot_fill_between(ax, colors[j],
                                             all_train_loss_smooth[j],
                                             train_losses_names[j])
            experiment_name = all_experiments_names[i]
            ax = self._configure_ax(ax,
                                    title='Smoothed losses of ' +
                                    experiment_name,
                                    xlabel='Epochs',
                                    ylabel='Loss',
                                    legend=True)
            output_plot_path = results_path + os.sep + experiment_name + '_merged-losses.png'

            fig.savefig(output_plot_path)
            plt.close(fig)

            ConsoleLogger.success(
                "Saved figure at path '{}'".format(output_plot_path))
Exemplo n.º 13
0
    def _plot_merged_losses_and_perplexities_figure(
            self, all_results_paths, all_experiments_names, all_train_losses,
            all_train_perplexities, all_latest_epochs, n_colors, colors):

        latest_epoch = all_latest_epochs[0]
        for i in range(1, len(all_latest_epochs)):
            if all_latest_epochs[i] != latest_epoch:
                raise ValueError(
                    'All experiments must have the same number of epochs to merge them'
                )

        results_path = all_results_paths[0]
        experiment_name = 'merged-loss-and-perplexity'
        output_plot_path = results_path + os.sep + experiment_name + '.png'

        all_train_loss_smooth = list()
        all_train_perplexity_smooth = list()
        for i in range(len(all_train_perplexities)):
            train_loss_smooth = self._smooth_curve(all_train_losses[i]['loss'])
            train_perplexity_smooth = self._smooth_curve(
                all_train_perplexities[i])
            all_train_loss_smooth.append(train_loss_smooth)
            all_train_perplexity_smooth.append(train_perplexity_smooth)

        all_train_loss_smooth = np.asarray(all_train_loss_smooth)
        all_train_perplexity_smooth = np.asarray(all_train_perplexity_smooth)
        all_train_loss_smooth = np.reshape(
            all_train_loss_smooth,
            (n_colors, latest_epoch,
             all_train_loss_smooth.shape[1] // latest_epoch))
        all_train_perplexity_smooth = np.reshape(
            all_train_perplexity_smooth,
            (n_colors, latest_epoch,
             all_train_perplexity_smooth.shape[1] // latest_epoch))

        fig = plt.figure(figsize=(16, 8))

        ax = fig.add_subplot(1, 2, 1)
        for i in range(len(all_train_loss_smooth)):
            ax = self._plot_fill_between(ax, colors[i],
                                         all_train_loss_smooth[i],
                                         all_experiments_names[i])
        ax = self._configure_ax(ax,
                                title='Smoothed loss',
                                xlabel='Epochs',
                                ylabel='Loss',
                                legend=True)

        ax = fig.add_subplot(1, 2, 2)
        for i in range(len(all_train_perplexity_smooth)):
            ax = self._plot_fill_between(ax, colors[i],
                                         all_train_perplexity_smooth[i],
                                         all_experiments_names[i])
        ax = self._configure_ax(ax,
                                title='Smoothed average codebook usage',
                                xlabel='Epochs',
                                ylabel='Perplexity',
                                legend=True)

        fig.savefig(output_plot_path)
        plt.close(fig)

        ConsoleLogger.success(
            "Saved figure at path '{}'".format(output_plot_path))