def _extract(data_file_name, settings_features, settings_data, dir_output_dev,
             dir_output_eva):
    # Load the data file.
    data_file = load_numpy_object(data_file_name)

    # Extract the features.
    features = feature_extraction(data_file['audio_data'].item(),
                                  **settings_features['process'])

    # Populate the recarray data and dtypes.
    array_data = (data_file['file_name'].item(), )
    dtypes = [('file_name', data_file['file_name'].dtype)]

    # Check if we keeping the raw audio data.
    if settings_features['keep_raw_audio_data']:
        # And add them to the recarray data and dtypes.
        array_data += (data_file['audio_data'].item(), )
        dtypes.append(('audio_data', data_file['audio_data'].dtype))

    # Add the rest to the recarray.
    array_data += (features, data_file['caption'].item(),
                   data_file['caption_ind'].item(),
                   data_file['words_ind'].item(),
                   data_file['chars_ind'].item())
    dtypes.extend([('features', np.dtype(object)),
                   ('caption', data_file['caption'].dtype),
                   ('caption_ind', data_file['caption_ind'].dtype),
                   ('words_ind', data_file['words_ind'].dtype),
                   ('chars_ind', data_file['chars_ind'].dtype)])

    # Make the recarray
    np_rec_array = np.rec.array([array_data], dtype=dtypes)

    # Make the path for serializing the recarray.
    parent_path = dir_output_dev \
        if data_file_name.parent.name == settings_data['audio_dirs']['development'] \
        else dir_output_eva

    file_path = parent_path.joinpath(data_file_name.name)

    # Dump it.
    dump_numpy_object(np_rec_array, file_path)
def extract_features_test(root_dir: str,
                          settings_data: MutableMapping[str, Any],
                          settings_features: MutableMapping[str, Any],
                          settings_audio: MutableMapping[str, Any]) \
        -> None:
    """Extracts test features from the audio data of Clotho.
    :param root_dir: Root dir for the data.
    :type root_dir: str
    :param settings_data: Settings for creating data files.
    :type settings_data: dict[str, T]
    :param settings_features: Settings for feature extraction.
    :type settings_features: dict[str, T]
    :param settings_audio: Settings for the audio.
    :type settings_audio: dict
    """
    # Get the root directory.
    dir_root = Path(root_dir)

    # Get the directories of files.
    dir_test = dir_root.joinpath(settings_data['audio_dirs']['downloaded'],
                                 settings_data['audio_dirs']['test'])

    audio_exists = False
    if dir_test.exists() and len(list(dir_test.iterdir())) != 0:
        audio_exists = True
    if not audio_exists:
        raise AttributeError(
            'Testing workflow selected, but could not find the test set audio files. '
            'Please download the test set audio before making test predictions.'
        )

    # Get the directories for output.
    dir_output_test = dir_root.joinpath(
        settings_data['features_dirs']['output'],
        settings_data['features_dirs']['test'])

    words_list = load_pickle_file(
        dir_root.joinpath(settings_data['pickle_files_dir'],
                          settings_data['files']['words_list_file_name']))

    # Create the directories.
    dir_output_test.mkdir(parents=True, exist_ok=True)

    # Apply the function to each file and save the result.
    for data_file_name in filter(lambda _x: _x.is_file(), dir_test.iterdir()):
        # Load the audio
        audio = load_audio_file(audio_file=str(data_file_name),
                                sr=int(settings_audio['sr']),
                                mono=settings_audio['to_mono'])

        # Extract the features.
        features = feature_extraction(audio, **settings_features['process'])

        # Populate the recarray data and dtypes.
        array_data = (data_file_name.name, )
        dtypes = [('file_name', f'U{len(data_file_name.name)}')]

        # Check if we keeping the raw audio data.
        if settings_features['keep_raw_audio_data']:
            # And add them to the recarray data and dtypes.
            array_data += (audio, )
            dtypes.append(('audio_data', audio.dtype))

        # Add the rest to the recarray.
        # Word indices are required for the dataloader to work
        array_data += (features,
                       np.array([
                           words_list.index('<sos>'),
                           words_list.index('<eos>')
                       ]))
        dtypes.extend([('features', np.dtype(object)),
                       ('words_ind', np.dtype(object))])

        # Make the recarray
        np_rec_array = np.rec.array([array_data], dtype=dtypes)

        # Make the path for serializing the recarray.
        parent_path = dir_output_test

        file_template = settings_data['files'][
            'np_file_name_template'].replace('_{caption_index}', '')
        file_path = parent_path.joinpath(
            file_template.format(audio_file_name=data_file_name.name))

        # Dump it.
        dump_numpy_object(np_rec_array, file_path)
def extract_features(root_dir: str,
                     settings_data: MutableMapping[str, Any],
                     settings_features: MutableMapping[str, Any]) \
        -> None:
    """Extracts features from the audio data of Clotho.
    :param root_dir: Root dir for the data.
    :type root_dir: str
    :param settings_data: Settings for creating data files.
    :type settings_data: dict[str, T]
    :param settings_features: Settings for feature extraction.
    :type settings_features: dict[str, T]
    """
    # Get the root directory.
    dir_root = Path(root_dir)

    # Get the directories of files.
    dir_output = dir_root.joinpath(settings_data['audio_dirs']['output'])

    dir_dev = dir_output.joinpath(settings_data['audio_dirs']['development'])
    dir_eva = dir_output.joinpath(settings_data['audio_dirs']['evaluation'])

    # Get the directories for output.
    dir_output_dev = dir_root.joinpath(
        settings_data['features_dirs']['output'],
        settings_data['features_dirs']['development'])
    dir_output_eva = dir_root.joinpath(
        settings_data['features_dirs']['output'],
        settings_data['features_dirs']['evaluation'])

    # Create the directories.
    dir_output_dev.mkdir(parents=True, exist_ok=True)
    dir_output_eva.mkdir(parents=True, exist_ok=True)

    # Apply the function to each file and save the result.
    for data_file_name in filter(lambda _x: _x.suffix == '.npy',
                                 chain(dir_dev.iterdir(), dir_eva.iterdir())):

        # Load the data file.
        data_file = load_numpy_object(data_file_name)

        # Extract the features.
        features = feature_extraction(data_file['audio_data'].item(),
                                      **settings_features['process'])

        # Populate the recarray data and dtypes.
        array_data = (data_file['file_name'].item(), )
        dtypes = [('file_name', data_file['file_name'].dtype)]

        # Check if we keeping the raw audio data.
        if settings_features['keep_raw_audio_data']:
            # And add them to the recarray data and dtypes.
            array_data += (data_file['audio_data'].item(), )
            dtypes.append(('audio_data', data_file['audio_data'].dtype))

        # Add the rest to the recarray.
        array_data += (features, data_file['caption'].item(),
                       data_file['caption_ind'].item(),
                       data_file['words_ind'].item(),
                       data_file['chars_ind'].item())
        dtypes.extend([('features', np.dtype(object)),
                       ('caption', data_file['caption'].dtype),
                       ('caption_ind', data_file['caption_ind'].dtype),
                       ('words_ind', data_file['words_ind'].dtype),
                       ('chars_ind', data_file['chars_ind'].dtype)])

        # Make the recarray
        np_rec_array = np.rec.array([array_data], dtype=dtypes)

        # Make the path for serializing the recarray.
        parent_path = dir_output_dev \
            if data_file_name.parent.name == settings_data['audio_dirs']['development'] \
            else dir_output_eva

        file_path = parent_path.joinpath(data_file_name.name)

        # Dump it.
        dump_numpy_object(np_rec_array, file_path)
示例#4
0
def create_split_data(csv_split: MutableSequence[MutableMapping[str, str]],
                      dir_split: Path, dir_audio: Path, dir_root: Path,
                      words_list: MutableSequence[str],
                      chars_list: MutableSequence[str],
                      settings_ann: MutableMapping[str, Any],
                      settings_audio: MutableMapping[str, Any],
                      settings_output: MutableMapping[str, Any]) -> None:
    """Creates the data for the split.

    :param csv_split: Annotations of the split.
    :type csv_split: list[collections.OrderedDict]
    :param dir_split: Directory for the split.
    :type dir_split: pathlib.Path
    :param dir_audio: Directory of the audio files for the split.
    :type dir_audio: pathlib.Path
    :param dir_root: Root directory of data.
    :type dir_root: pathlib.Path
    :param words_list: List of the words.
    :type words_list: list[str]
    :param chars_list: List of the characters.
    :type chars_list: list[str]
    :param settings_ann: Settings for the annotations.
    :type settings_ann: dict
    :param settings_audio: Settings for the audio.
    :type settings_audio: dict
    :param settings_output: Settings for the output files.
    :type settings_output: dict
    """
    # Make sure that the directory exists
    dir_split.mkdir(parents=True, exist_ok=True)

    captions_fields = [
        settings_ann['captions_fields_prefix'].format(i)
        for i in range(1,
                       int(settings_ann['nb_captions']) + 1)
    ]

    # For each sound:
    for csv_entry in csv_split:
        file_name_audio = csv_entry[settings_ann['audio_file_column']]

        audio = load_audio_file(audio_file=str(
            dir_root.joinpath(dir_audio, file_name_audio)),
                                sr=int(settings_audio['sr']),
                                mono=settings_audio['to_mono'])

        for caption_ind, caption_field in enumerate(captions_fields):
            caption = csv_entry[caption_field]

            words_caption = get_sentence_words(
                caption,
                unique=settings_ann['use_unique_words_per_caption'],
                keep_case=settings_ann['keep_case'],
                remove_punctuation=settings_ann['remove_punctuation_words'],
                remove_specials=not settings_ann['use_special_tokens'])

            chars_caption = list(
                chain.from_iterable(
                    clean_sentence(caption,
                                   keep_case=settings_ann['keep_case'],
                                   remove_punctuation=settings_ann[
                                       'remove_punctuation_chars'],
                                   remove_specials=True)))

            if settings_ann['use_special_tokens']:
                chars_caption.insert(0, ' ')
                chars_caption.insert(0, '<sos>')
                chars_caption.append(' ')
                chars_caption.append('<eos>')

            indices_words = [words_list.index(word) for word in words_caption]
            indices_chars = [chars_list.index(char) for char in chars_caption]

            #   create the numpy object with all elements
            np_rec_array = np.rec.array(
                np.array(
                    (file_name_audio, audio, caption, caption_ind,
                     np.array(indices_words), np.array(indices_chars)),
                    dtype=[('file_name', 'U{}'.format(len(file_name_audio))),
                           ('audio_data', np.dtype(object)),
                           ('caption', 'U{}'.format(len(caption))),
                           ('caption_ind', 'i4'),
                           ('words_ind', np.dtype(object)),
                           ('chars_ind', np.dtype(object))]))

            #   save the numpy object to disk
            dump_numpy_object(
                np_obj=np_rec_array,
                file_name=str(
                    dir_split.joinpath(
                        settings_output['file_name_template'].format(
                            audio_file_name=file_name_audio,
                            caption_index=caption_ind))))