def as_sequences(self, variable=None): ''' Convert a Value to a sequence of NumPy arrays that have their masked entries removed. Returns: If variable contains more dynamic axes than the batch axis, a list of NumPy arrays (if dense) or a SciPy CSR array (if sparse) will be returned. Otherwise, the arrays will be returned directly. ''' if self.is_sparse: if variable is None: raise ValueError('cannot convert sparse value to sequences ' 'without the corresponding variable') network = _sparse_to_dense_network_cache(variable.shape, True, self.device) warnings.warn( 'converting Value object to CSR format might be slow') # TODO: Add direct conversion, since creating an intermediate array might be slow dense_data = network.eval(self, device=self.device) return [sparse.csr_matrix(seq) for seq in dense_data] else: # Checking for mask without retrieving has_mask = super(Value, self).mask() is not None if has_mask: if variable is None: mask = self.mask return [ seq[mask[idx] != cntk_py.MaskKind_Invalid] for idx, seq in enumerate(self.data.asarray()) ] else: value_sequences = self.unpack_variable_value( variable, True, cpu()) return [seq.asarray() for seq in value_sequences[0]] else: # This might be costly, but we need to return a list for # consistency. arr = self.asarray() if not arr.shape: return [arr] else: return list(arr)
def asarray(variable, value): ''' Converts a Value object to a sequence of NumPy arrays (if dense) or CSR arrays (if sparse). ''' if value.is_sparse(): network = _sparse_to_dense_network_cache(variable.shape) warnings.warn('converting Value object to CSR format might be very costly') # TODO: Add direct conversion, since creating an intermediate array might be very slow dense_data = network.eval(value, value.device()) array_to_return = [sparse.csr_matrix(seq) for seq in dense_data] else: array_to_return = Value.to_seq(value, variable) return array_to_return
def as_sequences(self, variable=None): ''' Convert a Value to a sequence of NumPy arrays that have their masked entries removed. Returns: If variable contains more dynamic axes than the batch axis, a list of NumPy arrays (if dense) or a SciPy CSR array (if sparse) will be returned. Otherwise, the arrays will be returned directly. ''' if self.is_sparse: if variable is None: raise ValueError('cannot convert sparse value to sequences ' 'without the corresponding variable') network = _sparse_to_dense_network_cache(variable.shape, True, self.device) warnings.warn('converting Value object to CSR format might be slow') # TODO: Add direct conversion, since creating an intermediate array might be slow dense_data = network.eval(self, device=self.device) return [sparse.csr_matrix(seq) for seq in dense_data] else: # Checking for mask without retrieving has_mask = super(Value, self).mask() is not None if has_mask: if variable is None: mask = self.mask return [seq[mask[idx] != cntk_py.MaskKind_Invalid] for idx, seq in enumerate(self.data.asarray())] else: value_sequences = self.unpack_variable_value(variable, True, cpu()) return [seq.asarray() for seq in value_sequences[0]] else: # This might be costly, but we need to return a list for # consistency. arr = self.asarray() if not arr.shape: return [arr] else: return list(arr)
def asarray(self): ''' Converts the instance's data to a NumPy array. ''' import cntk result = None if isinstance(self, cntk.Constant): ndav = super(cntk.Constant, self).value() is_sparse = ndav.is_sparse() elif isinstance(self, cntk.Parameter): ndav = super(cntk.Parameter, self).value() is_sparse = ndav.is_sparse() elif isinstance(self, (cntk.cntk_py.Constant, cntk.cntk_py.Parameter)): ndav = self.value() is_sparse = ndav.is_sparse() elif isinstance(self, (cntk.cntk_py.NDArrayView, cntk.cntk_py.NDMask)): ndav = self if isinstance(self, cntk.NDArrayView): is_sparse = ndav.is_sparse elif isinstance(self, cntk.cntk_py.NDArrayView): is_sparse = ndav.is_sparse() else: is_sparse = False # Value and MinibatchData have a mask, which means that we need the # corresponding Variable to do the proper conversion. For easy # discoverability, we nevertheless add asarray() to those classes as # well, but issue a warning. elif isinstance(self, cntk.cntk_py.Value) or isinstance(self, cntk.cntk_py.MinibatchData): if isinstance(self, cntk.cntk_py.MinibatchData): value = self.data else: value = self if isinstance(value, cntk.Value): is_sparse = value.is_sparse has_mask = super(cntk.Value, value).mask() is not None ndav = value.data else: is_sparse = value.is_sparse() has_mask = value.mask() is not None ndav = value.data() if has_mask: warnings.warn('asarray() will ignore the mask information. ' 'Please use as_sequences() to do the proper ' 'conversion.') if is_sparse: from cntk.internal.sanitize import _sparse_to_dense_network_cache device = ndav.device if callable(device): device = device() network = _sparse_to_dense_network_cache(ndav.shape[1:], False, device) warnings.warn('converting Value object to CSR format might be slow') dense_data = network.eval(self, device=device) def to_csr(dense_data): if len(dense_data.shape) > 2: raise ValueError('Cannot convert a sparse NDArrayView or Value object ' 'with shape %s of rank > 2 to a scipy.csr matrix.' % str(dense_data.shape)) return sparse.csr_matrix(dense_data) if isinstance(dense_data, list): result = [to_csr(d) for d in dense_data] else: result = to_csr(dense_data) else: result = ndav.to_ndarray() return result
def asarray(self): ''' Converts the instance's data to a NumPy array. ''' import cntk result = None if isinstance(self, cntk.Constant): ndav = super(cntk.Constant, self).value() is_sparse = ndav.is_sparse() elif isinstance(self, cntk.Parameter): ndav = super(cntk.Parameter, self).value() is_sparse = ndav.is_sparse() elif isinstance(self, (cntk.cntk_py.Constant, cntk.cntk_py.Parameter)): ndav = self.value() is_sparse = ndav.is_sparse() elif isinstance(self, (cntk.cntk_py.NDArrayView, cntk.cntk_py.NDMask)): ndav = self if isinstance(self, cntk.NDArrayView): is_sparse = ndav.is_sparse elif isinstance(self, cntk.cntk_py.NDArrayView): is_sparse = ndav.is_sparse() else: is_sparse = False # Value and MinibatchData have a mask, which means that we need the # corresponding Variable to do the proper conversion. For easy # discoverability, we nevertheless add asarray() to those classes as # well, but issue a warning. elif isinstance(self, cntk.cntk_py.Value) or isinstance( self, cntk.cntk_py.MinibatchData): if isinstance(self, cntk.cntk_py.MinibatchData): value = self.data else: value = self if isinstance(value, cntk.Value): is_sparse = value.is_sparse has_mask = super(cntk.Value, value).mask() is not None ndav = value.data else: is_sparse = value.is_sparse() has_mask = value.mask() is not None ndav = value.data() if has_mask: warnings.warn('asarray() will ignore the mask information. ' 'Please use as_sequences() to do the proper ' 'conversion.') if is_sparse: from cntk.internal.sanitize import _sparse_to_dense_network_cache device = ndav.device if callable(device): device = device() network = _sparse_to_dense_network_cache(ndav.shape[1:], False, device) warnings.warn( 'converting Value object to CSR format might be slow') dense_data = network.eval(self, device=device) def to_csr(dense_data): if len(dense_data.shape) > 2: raise ValueError( 'Cannot convert a sparse NDArrayView or Value object ' 'with shape %s of rank > 2 to a scipy.csr matrix.' % str(dense_data.shape)) return sparse.csr_matrix(dense_data) if isinstance(dense_data, list): result = [to_csr(d) for d in dense_data] else: result = to_csr(dense_data) else: result = ndav.to_ndarray() return result