Exemplo n.º 1
0
def test_temporal_array_serialization():
    # Individual items do not support JSON/etc. serialization;
    # instead, the XSet (e.g. CutSet) classes convert them to dicts.
    manifest = TemporalArray(
        array=Array(
            storage_type="lilcom_hdf5",
            storage_path="/tmp/data",
            storage_key="irrelevant",
            shape=[300],
        ),
        temporal_dim=0,
        frame_shift=0.3,
        start=5.0,
    )
    serialized = manifest.to_dict()
    restored = TemporalArray.from_dict(serialized)
    assert manifest == restored
Exemplo n.º 2
0
    def store_array(
        self,
        key: str,
        value: np.ndarray,
        frame_shift: Optional[Seconds] = None,
        temporal_dim: Optional[int] = None,
        start: Seconds = 0,
    ) -> Union[Array, TemporalArray]:
        """
        Store a numpy array in the underlying storage and return a manifest
        describing how to retrieve the data.

        If the array contains a temporal dimension (e.g. it represents the
        frame-level features, alignment, posteriors, etc. of an utterance)
        then ``temporal_dim`` and ``frame_shift`` may be specified to enable
        downstream padding, truncating, and partial reads of the array.

        :param key: An ID that uniquely identifies the array.
        :param value: The array to be stored.
        :param frame_shift: Optional float, when the array has a temporal dimension
            it indicates how much time has passed between the starts of consecutive frames
            (expressed in seconds).
        :param temporal_dim: Optional int, when the array has a temporal dimension,
            it indicates which dim to interpret as temporal.
        :param start: Float, when the array is temporal, it indicates what is the offset
            of the array w.r.t. the start of recording. Useful for reading subsets
            of an array when it represents something computed from long recordings.
            Ignored for non-temporal arrays.
        :return: A manifest of type :class:`~lhotse.array.Array` or
            :class:`~lhotse.array.TemporalArray`, depending on the input arguments.
        """
        is_temporal = frame_shift is not None and temporal_dim is not None
        if not is_temporal:
            assert all(arg is None for arg in [frame_shift, temporal_dim]), (
                "frame_shift and temporal_dim have to be both None or both set "
                f"(got frame_shift={frame_shift}, temporal_dim={temporal_dim})."
            )

        storage_key = self.write(key, value)
        array = Array(
            storage_type=self.name,
            storage_path=self.storage_path,
            storage_key=storage_key,
            shape=list(value.shape),
        )

        if not is_temporal:
            return array

        return TemporalArray(
            array=array,
            temporal_dim=temporal_dim,
            frame_shift=frame_shift,
            start=start,
        )
Exemplo n.º 3
0
def validate_temporal_array(arr: TemporalArray,
                            read_data: bool = False) -> None:
    assert arr.temporal_dim >= 0, "TemporalArray: temporal_dim cannot be negative."
    assert arr.temporal_dim < arr.ndim, (
        f"TemporalArray: temporal_dim {arr.temporal_dim} "
        f"canot be greater than ndim {arr.ndim}.")
    assert arr.frame_shift > 0, "TemporalArray: frame_shift must be positive."
    assert arr.start >= 0, "TemporalArray: start must be non-negative."
    if read_data:
        data = arr.load()
        assert data.shape == arr.shape
Exemplo n.º 4
0
def test_temporal_array_set_prefix_path():
    arr = TemporalArray(
        array=Array(
            storage_type="lilcom_hdf5",
            storage_path="data/train",
            storage_key="irrelevant",
            shape=[300],
        ),
        temporal_dim=0,
        frame_shift=0.3,
        start=5.0,
    )
    arr1 = arr.with_path_prefix("/newhome")
    assert arr1.array.storage_path == "/newhome/data/train"
    assert arr1.array.storage_type == arr.array.storage_type
    assert arr1.array.storage_key == arr.array.storage_key
    assert arr1.shape == arr.shape
    assert arr1.temporal_dim == arr.temporal_dim
    assert arr1.frame_shift == arr.frame_shift
    assert arr1.start == arr.start