Exemplo n.º 1
0
def StreamDef(field=None,
              shape=None,
              is_sparse=False,
              transforms=None,
              context=None,
              scp=None,
              mlf=None,
              broadcast=None,
              defines_mb_size=False):
    '''
       Configuration of a stream for use with the builtin Deserializers.
       The meanings of some configuration keys have a mild dependency on the
       exact deserializer, and certain keys are meaningless for certain
       deserializers.

    Args:
        field (`str`, defaults to `None`): this is the name of the stream

         * for CTFDeserializer the name is inside the CTF file
         * for ImageDeserializer the acceptable names are `image` or `label`
         * for HTKFeatureDeserializer and HTKMLFDeserializer only the default
           value of None is acceptable

        shape (`int` or `tuple`, defaults to `None`): dimensions of this
          stream. HTKFeatureDeserializer, HTKMLFDeserializer, and
          CTFDeserializer read data as flat arrays. If you need different
          shapes you can :func:`~cntk.ops.reshape` it later.
        is_sparse (`bool`, defaults to `False`): whether the provided data is
          sparse. `False` by default, unless mlf is provided.
        transforms (`list`, defaults to `None`): list of transforms to be
          applied by the Deserializer. Currently only ImageDeserializer
          supports transforms.
        context (`tuple`, defaults to `None`): left and right context to
          consider when reading in HTK data. Only supported by
          HTKFeatureDeserializer.
        scp (`str` or `list`, defaults to `None`): scp files for HTK data
        mlf (`str` or `list`, defaults to `None`): mlf files for HTK data
        broadcast (`bool`, defaults to `None`): whether the features in this
          stream should be broadcast to the whole sequence (useful in e.g.
          ivectors with HTK)
        defines_mb_size (`bool`, defaults to False): whether this stream defines
          the minibatch size.
    '''
    config = dict(stream_alias=field, is_sparse=is_sparse)
    if shape is not None:
        config['dim'] = shape
    if transforms is not None:
        config['transforms'] = transforms
    if context is not None:
        config['context'] = context
    if scp is not None:
        config['scp'] = scp
    if mlf is not None:
        config['mlf'] = mlf
        config['is_sparse'] = True
    if broadcast is not None:
        config['broadcast'] = broadcast
    config['defines_mb_size'] = True if defines_mb_size else False

    return Record(**config)
Exemplo n.º 2
0
    def __init__(self,
                 deserializers=None,
                 randomize=True,
                 randomization_window=DEFAULT_RANDOMIZATION_WINDOW_IN_CHUNKS,
                 sample_based_randomization_window=False,
                 epoch_size=INFINITELY_REPEAT,
                 distributed_after=INFINITE_SAMPLES,
                 multithreaded_deserializer=None,
                 frame_mode=False,
                 truncation_length=0):

        if not isinstance(deserializers, (list, tuple)):
            deserializers = [deserializers
                             ]  # allow passing a single item or a list
        reader_config = _ReaderConfig(
            deserializers=deserializers,
            randomize=randomize,
            randomization_window=randomization_window,
            sample_based_randomization_window=sample_based_randomization_window,
            epoch_size=epoch_size,
            distributed_after=distributed_after,
            multithreaded_deserializer=multithreaded_deserializer,
            frame_mode=frame_mode,
            truncation_length=truncation_length)
        source = reader_config.minibatch_source()
        # transplant into this class instance
        self.__dict__ = source.__dict__
        # transplant all members of deserializers into a record called streams
        streams = {}
        for si in self.stream_infos():
            streams[si.m_name] = si
        from ..variables import Record
        self.streams = Record(**streams)
Exemplo n.º 3
0
    def _stream_infos(self, infos=None):
        inner = self.stream_infos()
        if len(inner) == 0:
            raise ValueError('Deserializer must provide at least one stream')
        infos.extend(inner)

        streams = {si.m_name: si for si in inner}
        self.streams = Record(**streams)
Exemplo n.º 4
0
    def streams(self):
        '''
        Describes the streams 'this' minibatch source produces.

        Returns:
            A `dict` mapping input names to instances of
            :class:`StreamInformation`
        '''
        if self._streams is None:
            self._streams = Record(**dict((info.m_name, info) for info in  self.stream_infos()))

        return self._streams
Exemplo n.º 5
0
    def __init__(self):
        super(UserMinibatchSource, self).__init__()

        streams = {si.m_name: si for si in self.stream_infos()}
        self.streams = Record(**streams)
Exemplo n.º 6
0
 def __enter__(self):
     _OptionsContextManager._current_default_overrides = Record(
         _scope=self.scope,
         _outer=_OptionsContextManager._current_default_overrides,
         **self.kwargs)  # insert new scope at head of link
     return self