def compute_dataset_stats(self):
        initial_index = 0
        attempts = 10
        current_attempt = 0
        total_length = len(self._training_loader)
        train_mfccs = list()
        while current_attempt < attempts:
            try:
                i = initial_index
                train_bar = tqdm(self._training_loader, initial=initial_index)
                for data in train_bar:
                    input_features = data['input_features']
                    train_mfccs.append(input_features.detach().view(input_features.size(1), input_features.size(2)).numpy())

                    i += 1

                    if i == total_length:
                        train_bar.update(total_length)
                        break

                train_bar.close()
                break

            except KeyboardInterrupt:
                train_bar.close()
                ConsoleLogger.warn('Keyboard interrupt detected. Leaving the function...')
                return
            except:
                error_message = 'An error occured in the data loader at {}. Current attempt: {}/{}'.format(i, current_attempt+1, attempts)
                self._logger.exception(error_message)
                ConsoleLogger.error(error_message)
                initial_index = i
                current_attempt += 1
                continue


        ConsoleLogger.status('Compute mean of mfccs training set...')
        train_mean = np.concatenate(train_mfccs).mean(axis=0)

        ConsoleLogger.status('Compute std of mfccs training set...')
        train_std = np.concatenate(train_mfccs).std(axis=0)

        stats = {
            'train_mean': train_mean,
            'train_std': train_std
        }

        ConsoleLogger.status('Writing stats in file...')
        with open(self._normalizer_path, 'wb') as file: # TODO: do not use hardcoded path
            pickle.dump(stats, file)

        train_mfccs_norm = (train_mfccs[0] - train_mean) / train_std

        ConsoleLogger.status('Computing example plot...')
        _, axs = plt.subplots(2, sharex=True)
        axs[0].imshow(train_mfccs[0].T, aspect='auto', origin='lower')
        axs[0].set_ylabel('Unnormalized')
        axs[1].imshow(train_mfccs_norm.T, aspect='auto', origin='lower')
        axs[1].set_ylabel('Normalized')
        plt.savefig('mfcc_normalization_comparaison.png') # TODO: do not use hardcoded path
Exemplo n.º 2
0
    def plot_training_losses(self, experiments, experiments_path):
        all_train_losses = list()
        all_train_perplexities = list()
        all_results_paths = list()
        all_experiments_names = list()
        all_latest_epochs = list()

        for experiment in experiments:
            try:
                train_res_losses, train_res_perplexities, latest_epoch = \
                    CheckpointUtils.retreive_losses_values(experiments_path, experiment)
                all_train_losses.append(train_res_losses)
                all_train_perplexities.append(train_res_perplexities)
                all_results_paths.append(experiment.results_path)
                all_experiments_names.append(experiment.name)
                all_latest_epochs.append(latest_epoch)
            except:
                ConsoleLogger.error(
                    "Failed to retreive losses of experiment '{}'".format(
                        experiment.name))

        n_final_losses_colors = len(all_train_losses)
        final_losses_colors = self._get_colors_from_cmap(
            self._colormap_name, n_final_losses_colors)

        # for each experiment: final loss + perplexity
        self._plot_loss_and_perplexity_figures(
            all_results_paths, all_experiments_names, all_train_losses,
            all_train_perplexities, all_latest_epochs, n_final_losses_colors,
            final_losses_colors)

        # merged experiment: merged final losses + merged perplexities
        self._plot_merged_losses_and_perplexities_figure(
            all_results_paths, all_experiments_names, all_train_losses,
            all_train_perplexities, all_latest_epochs, n_final_losses_colors,
            final_losses_colors)

        # for each experiment: all possible losses
        self._plot_merged_all_losses_figures(all_results_paths,
                                             all_experiments_names,
                                             all_train_losses,
                                             all_train_perplexities,
                                             all_latest_epochs)

        # merged losses of a single type in all experiments
        self._plot_merged_all_losses_type(all_results_paths,
                                          all_experiments_names,
                                          all_train_losses,
                                          all_train_perplexities,
                                          all_latest_epochs)
    def load(experiments_path,
             experiment_name,
             results_path,
             data_path='../data'):
        error_caught = False

        try:
            configuration_file, checkpoint_files = PipelineFactory.load_configuration_and_checkpoints(
                experiments_path, experiment_name)
        except:
            ConsoleLogger.error(
                'Failed to load existing configuration. Building a new model...'
            )
            error_caught = True

        # Load the configuration file
        ConsoleLogger.status('Loading the configuration file')
        configuration = None
        with open(experiments_path + os.sep + configuration_file, 'r') as file:
            configuration = yaml.load(file, Loader=yaml.FullLoader)
        device_configuration = DeviceConfiguration.load_from_configuration(
            configuration)

        if error_caught or len(checkpoint_files) == 0:
            trainer, evaluator = PipelineFactory.build(configuration,
                                                       device_configuration,
                                                       experiments_path,
                                                       experiment_name,
                                                       results_path)
        else:
            latest_checkpoint_file, latest_epoch = CheckpointUtils.search_latest_checkpoint_file(
                checkpoint_files)
            # Update the epoch number to begin with for the future training
            configuration['start_epoch'] = latest_epoch
            configuration['num_epochs'] = 60
            #latest_checkpoint_file = 'baseline_15_checkpoint.pth'
            #print(latest_checkpoint_file)
            # Load the checkpoint file
            checkpoint_path = experiments_path + os.sep + latest_checkpoint_file
            ConsoleLogger.status(
                "Loading the checkpoint file '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path,
                                    map_location=device_configuration.device)

            # Load the data stream
            ConsoleLogger.status('Loading the data stream')
            data_stream = VCTKFeaturesStream('/atlas/u/xuyilun/vctk',
                                             configuration,
                                             device_configuration.gpu_ids,
                                             device_configuration.use_cuda)

            def load_state_dicts(model, checkpoint, model_name,
                                 optimizer_name):
                # Load the state dict from the checkpoint to the model
                model.load_state_dict(checkpoint[model_name])
                # Create an Adam optimizer using the model parameters
                optimizer = optim.Adam(model.parameters())
                # Load the state dict from the checkpoint to the optimizer
                optimizer.load_state_dict(checkpoint[optimizer_name])
                # Map the optimizer memory into the specified device
                for state in optimizer.state.values():
                    for k, v in state.items():
                        if isinstance(v, torch.Tensor):
                            state[k] = v.to(device_configuration.device)
                return model, optimizer

            # If the decoder type is a deconvolutional
            if configuration['decoder_type'] == 'deconvolutional':
                # Create the model and map it to the specified device
                vqvae_model = ConvolutionalVQVAE(
                    configuration, device_configuration.device).to(
                        device_configuration.device)
                evaluator = Evaluator(device_configuration.device, vqvae_model,
                                      data_stream, configuration, results_path,
                                      experiment_name)

                # Load the model and optimizer state dicts
                vqvae_model, vqvae_optimizer = load_state_dicts(
                    vqvae_model, checkpoint, 'model', 'optimizer')
            elif configuration['decoder_type'] == 'wavenet':
                vqvae_model = WaveNetVQVAE(configuration,
                                           data_stream.speaker_dic,
                                           device_configuration.device).to(
                                               device_configuration.device)
                evaluator = Evaluator(device_configuration.device, vqvae_model,
                                      data_stream, configuration, results_path,
                                      experiment_name)
                # Load the model and optimizer state dicts
                vqvae_model, vqvae_optimizer = load_state_dicts(
                    vqvae_model, checkpoint, 'model', 'optimizer')
            else:
                raise NotImplementedError(
                    "Decoder type '{}' isn't implemented for now".format(
                        configuration['decoder_type']))

            # Temporary backward compatibility
            if 'trainer_type' not in configuration:
                ConsoleLogger.error(
                    "trainer_type was not found in configuration file. Use 'convolutional' by default."
                )
                configuration['trainer_type'] = 'convolutional'

            if configuration['trainer_type'] == 'convolutional':
                trainer = ConvolutionalTrainer(
                    device_configuration.device, data_stream, configuration,
                    experiments_path, experiment_name, **{
                        'model': vqvae_model,
                        'optimizer': vqvae_optimizer
                    })
            else:
                raise NotImplementedError(
                    "Trainer type '{}' isn't implemented for now".format(
                        configuration['trainer_type']))

            # Use data parallelization if needed and available
            vqvae_model = vqvae_model

        return trainer, evaluator, configuration, device_configuration
Exemplo n.º 4
0
    def export_to_features(self, vctk_path, configuration):
        if not os.path.isdir(vctk_path):
            raise ValueError(
                "VCTK dataset not found at path '{}'".format(vctk_path))

        # Create the features path directory if it doesn't exist
        features_path = vctk_path + os.sep + configuration['features_path']
        if not os.path.isdir(features_path):
            ConsoleLogger.status(
                'Creating features directory at path: {}'.format(
                    features_path))
            os.mkdir(features_path)
        else:
            ConsoleLogger.status(
                'Features directory already created at path: {}'.format(
                    features_path))

        # Create the features path directory if it doesn't exist
        train_features_path = features_path + os.sep + 'train'
        if not os.path.isdir(train_features_path):
            ConsoleLogger.status(
                'Creating train features directory at path: {}'.format(
                    train_features_path))
            os.mkdir(train_features_path)
        else:
            ConsoleLogger.status(
                'Train features directory already created at path: {}'.format(
                    train_features_path))

        # Create the features path directory if it doesn't exist
        val_features_path = features_path + os.sep + 'val'
        if not os.path.isdir(val_features_path):
            ConsoleLogger.status(
                'Creating val features directory at path: {}'.format(
                    val_features_path))
            os.mkdir(val_features_path)
        else:
            ConsoleLogger.status(
                'Val features directory already created at path: {}'.format(
                    val_features_path))

        def process(loader, output_dir, input_features_name,
                    output_features_name, rate, input_filters_number,
                    output_filters_number, input_target_shape,
                    augment_output_features, export_one_hot_features):

            initial_index = 0
            attempts = 10
            current_attempt = 0
            total_length = len(loader)

            while current_attempt < attempts:
                try:
                    i = initial_index
                    bar = tqdm(loader, initial=initial_index)
                    for data in bar:
                        (preprocessed_audio, one_hot, speaker_id, quantized,
                         wav_filename, sampling_rate, shifting_time,
                         random_starting_index, preprocessed_length,
                         top_db) = data

                        output_path = output_dir + os.sep + str(i) + '.pickle'
                        if os.path.isfile(output_path):
                            if os.path.getsize(output_path) == 0:
                                bar.set_description(
                                    '{} already exists but is empty. Computing it again...'
                                    .format(output_path))
                                os.remove(output_path)
                            else:
                                bar.set_description(
                                    '{} already exists'.format(output_path))
                            i += 1
                            continue

                        input_features = SpeechFeatures.features_from_name(
                            name=input_features_name,
                            signal=preprocessed_audio,
                            rate=rate,
                            filters_number=input_filters_number)

                        if input_features.shape[0] != input_target_shape[
                                0] or input_features.shape[
                                    1] != input_target_shape[1]:
                            ConsoleLogger.warn(
                                "Raw features number {} with invalid dimension {} will not be saved. Target shape: {}"
                                .format(i, input_features.shape,
                                        input_target_shape))
                            i += 1
                            continue

                        output_features = SpeechFeatures.features_from_name(
                            name=output_features_name,
                            signal=preprocessed_audio,
                            rate=rate,
                            filters_number=output_filters_number,
                            augmented=augment_output_features)

                        # TODO: add an option in configuration to save quantized/one_hot or not
                        output = {
                            'preprocessed_audio':
                            preprocessed_audio,
                            'wav_filename':
                            wav_filename,
                            'input_features':
                            input_features,
                            'one_hot':
                            one_hot
                            if export_one_hot_features else np.array([]),
                            'quantized':
                            np.array([]),
                            'speaker_id':
                            speaker_id,
                            'output_features':
                            output_features,
                            'shifting_time':
                            shifting_time,
                            'random_starting_index':
                            random_starting_index,
                            'preprocessed_length':
                            preprocessed_length,
                            'sampling_rate':
                            sampling_rate,
                            'top_db':
                            top_db
                        }

                        with open(output_path, 'wb') as file:
                            pickle.dump(output, file)

                        bar.set_description('{} saved'.format(output_path))

                        i += 1

                        if i == total_length:
                            bar.update(total_length)
                            break

                    bar.close()
                    break
                except KeyboardInterrupt:
                    bar.close()
                    ConsoleLogger.warn(
                        'Keyboard interrupt detected. Leaving the function...')
                    return
                except:
                    error_message = 'An error occured in the data loader at {}/{}. Current attempt: {}/{}'.format(
                        output_dir, i, current_attempt + 1, attempts)
                    self._logger.exception(error_message)
                    ConsoleLogger.error(error_message)
                    initial_index = i
                    current_attempt += 1
                    continue

        try:
            ConsoleLogger.status('Processing training part')
            process(
                loader=self._training_loader,
                output_dir=train_features_path,
                input_features_name=configuration['input_features_type'],
                output_features_name=configuration['output_features_type'],
                rate=configuration['sampling_rate'],
                input_filters_number=configuration['input_features_filters'],
                output_filters_number=configuration['output_features_filters'],
                input_target_shape=(configuration['input_features_dim'],
                                    configuration['input_features_filters'] *
                                    3),
                augment_output_features=configuration[
                    'augment_output_features'],
                export_one_hot_features=configuration[
                    'export_one_hot_features'])
            ConsoleLogger.success('Training part processed')
        except:
            ConsoleLogger.error(
                'An error occured during training features generation')

        try:
            ConsoleLogger.status('Processing validation part')
            process(
                loader=self._validation_loader,
                output_dir=val_features_path,
                input_features_name=configuration['input_features_type'],
                output_features_name=configuration['output_features_type'],
                rate=configuration['sampling_rate'],
                input_filters_number=configuration['input_features_filters'],
                output_filters_number=configuration['output_features_filters'],
                input_target_shape=(configuration['input_features_dim'],
                                    configuration['input_features_filters'] *
                                    3),
                augment_output_features=configuration[
                    'augment_output_features'],
                export_one_hot_features=configuration[
                    'export_one_hot_features'])
            ConsoleLogger.success('Validation part processed')
        except:
            ConsoleLogger.error(
                'An error occured during validation features generation')
Exemplo n.º 5
0
        def process(loader, output_dir, input_features_name,
                    output_features_name, rate, input_filters_number,
                    output_filters_number, input_target_shape,
                    augment_output_features, export_one_hot_features):

            initial_index = 0
            attempts = 10
            current_attempt = 0
            total_length = len(loader)

            while current_attempt < attempts:
                try:
                    i = initial_index
                    bar = tqdm(loader, initial=initial_index)
                    for data in bar:
                        (preprocessed_audio, one_hot, speaker_id, quantized,
                         wav_filename, sampling_rate, shifting_time,
                         random_starting_index, preprocessed_length,
                         top_db) = data

                        output_path = output_dir + os.sep + str(i) + '.pickle'
                        if os.path.isfile(output_path):
                            if os.path.getsize(output_path) == 0:
                                bar.set_description(
                                    '{} already exists but is empty. Computing it again...'
                                    .format(output_path))
                                os.remove(output_path)
                            else:
                                bar.set_description(
                                    '{} already exists'.format(output_path))
                            i += 1
                            continue

                        input_features = SpeechFeatures.features_from_name(
                            name=input_features_name,
                            signal=preprocessed_audio,
                            rate=rate,
                            filters_number=input_filters_number)

                        if input_features.shape[0] != input_target_shape[
                                0] or input_features.shape[
                                    1] != input_target_shape[1]:
                            ConsoleLogger.warn(
                                "Raw features number {} with invalid dimension {} will not be saved. Target shape: {}"
                                .format(i, input_features.shape,
                                        input_target_shape))
                            i += 1
                            continue

                        output_features = SpeechFeatures.features_from_name(
                            name=output_features_name,
                            signal=preprocessed_audio,
                            rate=rate,
                            filters_number=output_filters_number,
                            augmented=augment_output_features)

                        # TODO: add an option in configuration to save quantized/one_hot or not
                        output = {
                            'preprocessed_audio':
                            preprocessed_audio,
                            'wav_filename':
                            wav_filename,
                            'input_features':
                            input_features,
                            'one_hot':
                            one_hot
                            if export_one_hot_features else np.array([]),
                            'quantized':
                            np.array([]),
                            'speaker_id':
                            speaker_id,
                            'output_features':
                            output_features,
                            'shifting_time':
                            shifting_time,
                            'random_starting_index':
                            random_starting_index,
                            'preprocessed_length':
                            preprocessed_length,
                            'sampling_rate':
                            sampling_rate,
                            'top_db':
                            top_db
                        }

                        with open(output_path, 'wb') as file:
                            pickle.dump(output, file)

                        bar.set_description('{} saved'.format(output_path))

                        i += 1

                        if i == total_length:
                            bar.update(total_length)
                            break

                    bar.close()
                    break
                except KeyboardInterrupt:
                    bar.close()
                    ConsoleLogger.warn(
                        'Keyboard interrupt detected. Leaving the function...')
                    return
                except:
                    error_message = 'An error occured in the data loader at {}/{}. Current attempt: {}/{}'.format(
                        output_dir, i, current_attempt + 1, attempts)
                    self._logger.exception(error_message)
                    ConsoleLogger.error(error_message)
                    initial_index = i
                    current_attempt += 1
                    continue
Exemplo n.º 6
0
    def compute_groundtruth_alignments(self):
        ConsoleLogger.status(
            'Computing groundtruth alignments of VCTK val dataset...')

        desired_time_interval = 0.02
        extended_alignment_dataset = list()
        possible_phonemes = set()
        phonemes_counter = dict()
        total_phonemes_apparations = 0
        data_length = self._configuration['length'] / self._configuration[
            'sampling_rate']

        with tqdm(self._data_stream.validation_loader) as bar:
            for data in bar:
                speaker_ids = data['speaker_id'].to(self._device)
                wav_filenames = data['wav_filename']
                shifting_times = data['shifting_time'].to(self._device)
                loader_indices = data['index'].to(self._device)

                speaker_id = wav_filenames[0][0].split('/')[-2]
                if speaker_id not in os.listdir(self._vctk.raw_folder +
                                                os.sep + 'VCTK-Corpus' +
                                                os.sep + 'phonemes'):
                    # TODO: log the missing folders
                    continue

                for i in range(len(shifting_times)):
                    wav_filename = wav_filenames[0][i]
                    utterence_key = wav_filename.split('/')[-1].replace(
                        '.wav', '')
                    phonemes_alignment_path = os.sep.join(wav_filename.split('/')[:-3]) + os.sep + 'phonemes' + os.sep + utterence_key.split('_')[0] + os.sep \
                        + utterence_key + '.TextGrid'
                    if not os.path.isfile(phonemes_alignment_path):
                        # TODO: log this warn instead of print it
                        #ConsoleLogger.warn('File {} not found'.format(phonemes_alignment_path))
                        break

                    shifting_time = shifting_times[0].detach().cpu().item()
                    target_time_scale = np.arange(
                        (data_length / desired_time_interval) +
                        1) * desired_time_interval + shifting_time
                    shifted_indices = np.where(
                        target_time_scale >= shifting_time)
                    tg = textgrid.TextGrid()
                    tg.read(phonemes_alignment_path)
                    """if target_time_scale[-1] > tg.tiers[1][-1].maxTime:
                        ConsoleLogger.error('Shifting time error at {}.pickle: shifting_time:{}' \
                            ' target_time_scale[-1]:{} > tg.tiers[1][-1].maxTime:{}'.format(
                            loader_indices[i].detach().cpu().item(),
                            shifting_time,
                            target_time_scale[-1],
                            tg.tiers[1][-1].maxTime))
                        continue"""

                    phonemes = list()
                    current_target_time_index = 0
                    for interval in tg.tiers[1]:
                        if interval.mark in ['', '-', "'"]:
                            if interval == tg.tiers[1][-1] and len(
                                    phonemes) != int(
                                        data_length / desired_time_interval):
                                previous_interval = tg.tiers[1][-2]
                                ConsoleLogger.warn(
                                    "{}/{} phonemes aligned. Add the last valid phoneme '{}' in the list to have the correct number.\n"
                                    "Sanity checks to find the possible cause:\n"
                                    "current_target_time_index < (data_length / desired_time_interval): {}\n"
                                    "target_time_scale[current_target_time_index] >= interval.minTime: {}\n"
                                    "target_time_scale[current_target_time_index] <= interval.maxTime: {}"
                                    .format(
                                        len(phonemes),
                                        int(data_length /
                                            desired_time_interval),
                                        previous_interval.mark,
                                        current_target_time_index <
                                        (data_length / desired_time_interval),
                                        target_time_scale[
                                            current_target_time_index] >=
                                        previous_interval.minTime,
                                        target_time_scale[
                                            current_target_time_index] <=
                                        previous_interval.maxTime))
                                phonemes.append(previous_interval.mark)
                            continue
                        interval.minTime = float(interval.minTime)
                        interval.maxTime = float(interval.maxTime)
                        if interval.maxTime < shifting_time:
                            continue
                        interval.mark = interval.mark[:-1] if interval.mark[
                            -1].isdigit() else interval.mark
                        possible_phonemes.add(interval.mark)
                        if interval.mark not in phonemes_counter:
                            phonemes_counter[interval.mark] = 0
                        phonemes_counter[interval.mark] += 1
                        total_phonemes_apparations += 1
                        while current_target_time_index < (data_length / desired_time_interval) and \
                            target_time_scale[current_target_time_index] >= interval.minTime and \
                            target_time_scale[current_target_time_index] <= interval.maxTime:
                            phonemes.append(interval.mark)
                            current_target_time_index += 1
                        if len(phonemes) == int(data_length /
                                                desired_time_interval):
                            break
                    if len(phonemes) != int(
                            data_length / desired_time_interval):
                        intervals = [
                            'min:{} max:{} mark:{}'.format(
                                interval.minTime, interval.maxTime,
                                interval.mark) for interval in tg.tiers[1]
                        ]
                        ConsoleLogger.error(
                            'Error - min:{} max:{} shifting:{} target_time_scale: {} intervals: {}'
                            .format(interval.minTime, interval.maxTime,
                                    shifting_time, target_time_scale,
                                    intervals))
                        ConsoleLogger.error('#phonemes:{} phonemes:{}'.format(
                            len(phonemes), phonemes))
                    else:
                        extended_alignment_dataset.append(
                            (utterence_key, phonemes))

        with open(
                self._results_path + os.sep +
                'vctk_groundtruth_alignments.pickle', 'wb') as f:
            pickle.dump(
                {
                    'desired_time_interval': desired_time_interval,
                    'extended_alignment_dataset': extended_alignment_dataset,
                    'possible_phonemes': list(possible_phonemes),
                    'phonemes_counter': phonemes_counter,
                    'total_phonemes_apparations': total_phonemes_apparations
                }, f)
Exemplo n.º 7
0
    def compute_clustering_metrics(self):
        groundtruth_alignments_dic = None
        with open(
                self._results_path + os.sep +
                'vctk_groundtruth_alignments.pickle', 'rb') as f:
            groundtruth_alignments_dic = pickle.load(f)

        empirical_alignments_dic = None
        with open(
                self._results_path + os.sep + self._experiment_name +
                '_vctk_empirical_alignments.pickle', 'rb') as f:
            empirical_alignments_dic = pickle.load(f)

        groundtruth_alignments = np.array(
            groundtruth_alignments_dic['extended_alignment_dataset'])
        possible_phonemes = list(
            groundtruth_alignments_dic['possible_phonemes'])
        empirical_alignments = np.array(
            empirical_alignments_dic['all_alignments'])
        phonemes_indices = {
            possible_phonemes[i]: i
            for i in range(len(possible_phonemes))
        }

        ConsoleLogger.status('#{} possible phonemes: {}'.format(
            len(possible_phonemes), possible_phonemes))
        ConsoleLogger.status('# of raw groundtruth alignments: {}'.format(
            len(groundtruth_alignments)))
        ConsoleLogger.status('# of raw empirical alignments: {}'.format(
            len(empirical_alignments)))

        groundtruth_utterance_keys = set()
        final_groundtruth_alignments = list()
        final_empirical_alignments = list()

        alignment_length = ((self._configuration['length'] /
                             self._configuration['sampling_rate']) * 100) / 2

        for (utterence_key, alignment) in groundtruth_alignments:
            if len(alignment) != alignment_length:  # FIXME
                ConsoleLogger.error(
                    'len(alignment) != alignment_length: {}'.format(
                        len(alignment)))
                continue
            groundtruth_utterance_keys.add(utterence_key)
            final_groundtruth_alignments.append([
                phonemes_indices[alignment[i]] for i in range(len(alignment))
            ])

        for (utterence_key, alignment) in empirical_alignments:
            if utterence_key in groundtruth_utterance_keys:
                final_empirical_alignments.append(alignment)

        final_groundtruth_alignments = np.asarray(final_groundtruth_alignments)
        final_empirical_alignments = np.asarray(final_empirical_alignments)

        ConsoleLogger.status('Groundtruth alignments shape: {}'.format(
            final_groundtruth_alignments.shape))
        ConsoleLogger.status('Empirical alignments shape: {}'.format(
            final_empirical_alignments.shape))

        ConsoleLogger.status('Groundtruth alignments samples: {}'.format(
            [final_groundtruth_alignments[i] for i in range(2)]))
        ConsoleLogger.status('Empirical alignments samples: {}'.format(
            [final_empirical_alignments[i] for i in range(2)]))

        concatenated_groundtruth_alignments = np.concatenate(
            final_groundtruth_alignments)
        concatenated_empirical_alignments = np.concatenate(
            final_empirical_alignments)

        ConsoleLogger.status(
            'Concatenated groundtruth alignments shape: {}'.format(
                concatenated_groundtruth_alignments.shape))
        ConsoleLogger.status(
            'Concatenated empirical alignments shape: {}'.format(
                concatenated_empirical_alignments.shape))

        adjusted_rand_score = sklearn.metrics.adjusted_rand_score(
            concatenated_groundtruth_alignments,
            concatenated_empirical_alignments)
        adjusted_mutual_info_score = sklearn.metrics.adjusted_mutual_info_score(
            concatenated_groundtruth_alignments,
            concatenated_empirical_alignments)
        normalized_mutual_info_score = sklearn.metrics.normalized_mutual_info_score(
            concatenated_groundtruth_alignments,
            concatenated_empirical_alignments)

        ConsoleLogger.success(
            'Adjusted rand score: {}'.format(adjusted_rand_score))
        ConsoleLogger.success('Adjusted mututal info score: {}'.format(
            adjusted_mutual_info_score))
        ConsoleLogger.success(
            'Normalized adjusted mututal info score: {}'.format(
                normalized_mutual_info_score))

        with open(
                self._results_path + os.sep + self._experiment_name +
                '_adjusted_rand_score.npy', 'wb') as f:
            np.save(f, adjusted_rand_score)

        with open(
                self._results_path + os.sep + self._experiment_name +
                '_adjusted_mutual_info_score.npy', 'wb') as f:
            np.save(f, adjusted_mutual_info_score)

        with open(
                self._results_path + os.sep + self._experiment_name +
                '_normalized_mutual_info_score.npy', 'wb') as f:
            np.save(f, normalized_mutual_info_score)

        ConsoleLogger.success(
            'All scores from cluestering metrics were successfully saved')