Exemplo n.º 1
0
def load_features(path, chunk_size=128, r_threshold=32):
    """Load feature vectors from the specified HDF5 file.

    Since the original feature vectors are of variable length, this
    function partitions them into chunks of length `chunk_size`. When
    they cannot be partitioned exactly, one of three things can happen:

      * If the length of the vector is less than the chunk size, the
        vector is simply padded with a fill value.
      * If the remainder, ``r``, is less than ``r_threshold``, the edges
        of the vector are truncated so that it can be partitioned.
      * If the remainder, ``r``, is greater than ``r_threshold``, the
        last chunk is the last `chunk_size` frames of the feature vector
        such that it overlaps with the penultimate chunk.

    Args:
        path (str): Path to the HDF5 file.
        chunk_size (int): Size of a chunk.
        r_threshold (int): Threshold for ``r`` (see above).

    Returns:
        np.ndarray: Array of feature vectors.
        list: Number of chunks for each audio clip.
    """
    chunks = []
    n_chunks = []
    with h5py.File(path, 'r') as f:
        feats = f['F']
        shape = feats.attrs['shape']
        for i, feat in enumerate(tqdm(feats)):
            # Reshape flat array to original shape
            feat = np.reshape(feat, (-1, *shape))

            if len(feat) == 0:
                n_chunks.append(0)
                continue

            # Split feature vector into chunks along time axis
            q = len(feat) // chunk_size
            r = len(feat) % chunk_size
            if not q and r:
                split = [
                    utils.pad_truncate(feat,
                                       chunk_size,
                                       pad_value=np.min(feat))
                ]
            elif r:
                r = len(feat) % chunk_size
                off = r // 2 if r < r_threshold else 0
                split = np.split(feat[off:q * chunk_size + off], q)
                if r >= r_threshold:
                    split.append(feat[-chunk_size:])
            else:
                split = np.split(feat, q)

            n_chunks.append(len(split))
            chunks += split

    return np.array(chunks), n_chunks
Exemplo n.º 2
0
def _reshape_spec(feat, r_threshold=32):
    q = feat.shape[0] // 128
    r = feat.shape[0] % 128
    r_threshold = 32
    #print(q,r)

    if not q:
        split = [utils.pad_truncate(feat, 128, pad_value=np.min(feat))]
    else:
        off = r // 2 if r < r_threshold else 0
        split = np.split(feat[off:q * 128 + off], q)
        if r >= r_threshold:
            split.append(feat[-128:])
        return np.array(split)
Exemplo n.º 3
0
def extract_dataset(dataset_path,
                    file_names,
                    extractor,
                    clip_duration,
                    output_path,
                    recompute=False,
                    n_transforms_iter=None,
                    ):
    """Extract features from the audio clips in a dataset.

    Args:
        dataset_path (str): Path of directory containing dataset.
        file_names (list): List of file names for the audio clips.
        extractor: Class instance for feature extraction.
        clip_duration: Duration of a reference clip in seconds. Used to
            ensure all feature vectors are of the same length.
        output_path: File path of output HDF5 file.
        recompute (bool): Whether to extract features that already exist
            in the HDF5 file.
        n_transforms_iter (iterator): Iterator for the number of
            transformations to apply for each example. If data
            augmentation should be disabled, set this to ``None``.
            Otherwise, ensure that `file_names` has been expanded as if
            by calling :func:`data_augmentation.expand_metadata`.
    """
    # Create/load the HDF5 file to store the feature vectors
    with h5py.File(output_path, 'a') as f:
        size = len(file_names)  # Size of dataset

        # Create/load feature vector dataset and timestamp dataset
        feats_shape = (size,) + extractor.output_shape(clip_duration)
        feats = f.require_dataset('F', feats_shape, dtype=np.float32)
        timestamps = f.require_dataset('timestamps', (size,),
                                       dtype=h5py.special_dtype(vlen=bytes))

        transforms = iter(())

        for i, name in enumerate(tqdm(file_names)):
            # Skip if existing feature vector should not be recomputed
            if timestamps[i] and not recompute:
                next(transforms, None)
                continue

            # Generate next transform or, if iterator is empty, load
            # the next audio clip from disk. Note that the iterator will
            # always be empty if data augmentation (DA) is disabled.
            x = next(transforms, None)
            if x is None:
                # Load audio file from disk
                path = os.path.join(dataset_path, name)
                x, sample_rate = librosa.load(path, sr=None)

                # Create new transform generator if DA is enabled
                if n_transforms_iter:
                    transforms = aug.transformations(
                        x, sample_rate, next(n_transforms_iter))

            # Compute feature vector using extractor
            vec = extractor.extract(x, sample_rate)
            vec = utils.pad_truncate(vec, feats_shape[1])

            # Save to dataset
            feats[i] = vec
            # Record timestamp in ISO format
            timestamps[i] = dt.datetime.now().isoformat()