def __init__(self, data_streams, max_samples = INFINITELY_REPEAT): from cntk import Variable if not data_streams: raise(ValueError('at least one stream must be specified, in the form name=data or name=(data, type)')) self._data = dict() # [name] -> numpy.array or scipy.sparse.csr_matrix self._types = dict() # [name] -> Variable._Type self._is_sequence = dict() # [name] -> bool self._vars = dict() # [name] -> Variable self._max_samples = max_samples # get the data and types from the input, and form streams array self._num_samples = -1 # total number of samples --must be the same for all args from scipy import sparse for name, arg in data_streams.items(): if isinstance(arg, tuple): value, type = arg type = Variable._Type._sanitize(type) dynamic_axes = getattr(type, 'dynamic_axes', None) is_sequence = dynamic_axes and len(dynamic_axes) > 1 if not isinstance(type, Variable._Type): raise ValueError('type must be a CNTK variable type, e.g. Tensor[13]') else: value = arg is_sequence = False # data without type cannot have a dynamic axis type = Variable._Type(is_sparse=isinstance(value, sparse.csr_matrix)) # shape implanted below if not isinstance(value[0] if isinstance(value, list) else value, (np.ndarray, sparse.csr_matrix, Value)): raise TypeError('data must be a numpy.array or scipy.sparse.csr_matrix, or a list of those') sample_shape = value[0].shape[1:] if is_sequence else value.shape[1:] if not type.shape_is_known: type = type.updated_with(shape=sample_shape) # implant the shape elif type.shape != sample_shape: ValueError("specified type's shape does not match the data's shape") try: dtype = value.dtype # numpy array and Value except: dtype = value[0].dtype # for lists try: type.dtype except: type = type.updated_with(dtype=dtype) # implant the dtype num_samples = MinibatchSourceFromData._get_len(value) if self._num_samples == -1: if num_samples == 0: raise(ValueError('data is empty')) self._num_samples = num_samples elif self._num_samples != num_samples: raise TypeError('all data items must have the same first dimension') self._data[name] = value self._types[name] = type self._is_sequence[name] = is_sequence self._cursor = 0 # current position self._total_num_samples = 0 # total count; once the limit is reached, we stop returning data super(MinibatchSourceFromData, self).__init__()
def variable(shape, data_type=None, needs_gradient=False, is_sparse=False, dynamic_axes = [Axis.default_dynamic_axis(), Axis.default_batch_axis()], name=''): ''' It creates an input node. The graph requires a separate reader that will be fed to this input. Args: shape (tuple or int): the shape of the input tensor data_type: np.float32 or np.float64 needs_gradients (bool): whether to back-propagates to it or not is_sparse (bool): whether the variable is sparse dynamic_axes (list): a list of dynamic axis (e.g., batch axis, time axis) name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from .variables import Variable # TODO dynamic axis for numpy arrays # TODO sparse for numpy arrays return Variable(shape, data_type, needs_gradient, is_sparse, dynamic_axes, name)