def create_batch(dataset, hparams, is_training, batch_size=None): """Batch a dataset, optional batch_size override.""" if not batch_size: batch_size = hparams.batch_size if hparams.max_expected_train_example_len and is_training: dataset = dataset.batch(batch_size, drop_remainder=True) elif hparams.split_pianoroll: dataset = dataset.padded_batch( batch_size, padded_shapes=(MultiFeatureTensors( TensorShape([None, 229, 1]), TensorShape([None, 229, 1]), TensorShape([hparams.timbre_num_classes])), LabelTensors( TensorShape( [None, 88, hparams.timbre_num_classes + 1]), TensorShape( [None, 88, hparams.timbre_num_classes + 1]), TensorShape( [None, 88, hparams.timbre_num_classes + 1]), )), drop_remainder=True) else: dataset = dataset.padded_batch( batch_size, padded_shapes=(FeatureTensors(TensorShape([None, 229, 1])), LabelTensors( TensorShape([None, 88]), TensorShape([None, 88]), TensorShape([None, 88]), )), drop_remainder=True) return dataset
def _get_test_input_function(self): """ Inheriting class must implement this :return: callable """ dataset = tf.data.Dataset.from_generator( self._yield_test_samples, (tf.float32, tf.bool, tf.bool), output_shapes=(TensorShape([ Dimension(self._hparams.frames_per_sample), Dimension(self._hparams.neff) ]), TensorShape([ Dimension(self._hparams.frames_per_sample), Dimension(self._hparams.neff) ]), TensorShape([ Dimension(self._hparams.frames_per_sample), Dimension(self._hparams.neff), Dimension(2) ]))) dataset = dataset.map( self.feature_map_func, num_parallel_calls=self._hparams.num_parallel_calls) dataset = dataset.batch(batch_size=self._hparams.batch_size, drop_remainder=True) dataset = dataset.prefetch(self._hparams.prefetch_size) dataset = dataset.cache( filename=os.path.join(self.iterator_dir, "test_data_cache")) print_info("Dataset output sizes are: ") print_info(dataset.output_shapes) return dataset
def _get_test_input_function(self): """ Inheriting class must implement this :return: callable """ dataset = tf.data.Dataset.from_generator( self._yield_test_samples, (tf.float32, tf.bool, tf.bool), output_shapes=(TensorShape([ Dimension(self._hparams.frames_per_sample), Dimension(self._hparams.neff) ]), TensorShape([ Dimension(self._hparams.frames_per_sample), Dimension(self._hparams.neff) ]), TensorShape([ Dimension(self._hparams.frames_per_sample), Dimension(self._hparams.neff), Dimension(2) ]))) # Map the generator output as features as a dict and labels dataset = dataset.map(lambda x, y, z: ({ self.FEATURE_1_NAME: x, self.FEATURE_2_NAME: y }, z)) dataset = dataset.batch(batch_size=self._hparams.batch_size, drop_remainder=True) dataset = dataset.prefetch(self._hparams.prefetch_size) # dataset = dataset.cache(filename=os.path.join(self.iterator_dir, "test_data_cache")) print_info("Dataset output sizes are: ") print_info(dataset.output_shapes) return dataset
def _user_resize_func(self, sample, vad, label): """ Function that sets up the sizes of the tensor, after execution of `tf.py_func` call :param data: :param label: :return: """ sample = tf.reshape(sample, shape=TensorShape([ Dimension(self._hparams.dummy_slicing_dim), Dimension(self._hparams.neff) ])) vad = tf.reshape(vad, shape=TensorShape([ Dimension(self._hparams.dummy_slicing_dim), Dimension(self._hparams.neff) ])) label = tf.reshape(label, shape=TensorShape([ Dimension(self._hparams.dummy_slicing_dim), Dimension(self._hparams.neff), Dimension(2) ])) return ({self.FEATURE_1_NAME: sample, self.FEATURE_2_NAME: vad}, label)
def _multi_worker_init(**kwargs): replica_ctx = get_replica_context() global_id = replica_ctx.replica_id_in_sync_group if global_id == 0: unique_id = kit_lib.get_nccl_unique_id() re = collective_ops.broadcast_send( unique_id, TensorShape([ 32, ]), int32, group_size=replica_ctx.num_replicas_in_sync, group_key=1, instance_key=2) else: re = collective_ops.broadcast_recv( TensorShape([ 32, ]), int32, group_size=replica_ctx.num_replicas_in_sync, group_key=1, instance_key=2) if global_id == 0: global_seed = kwargs.get("seed", None) or kit_lib.gen_random_seed() re_seed = collective_ops.broadcast_send( global_seed, TensorShape([ 1, ]), int64, group_size=replica_ctx.num_replicas_in_sync, group_key=1, instance_key=3) else: global_seed = kwargs.get("seed", None) re_seed = collective_ops.broadcast_recv( TensorShape([ 1, ]), int64, group_size=replica_ctx.num_replicas_in_sync, group_key=1, instance_key=3) if (global_seed and global_seed != re_seed): logging.warning( "The seed: {} is not consistent with that from cheif-node: {}, " "and the seed from cheif-node will be used.".format( global_seed, re_seed)) visible_devices = _get_visible_devices() status = kit_lib.plugin_init( global_id, replica_ctx.num_replicas_in_sync, re, re_seed, visible_devices, global_batch_size=kwargs['global_batch_size']) return status
def get_features(tokenizer, sentences, labels): features = [] for i, sentence in enumerate(sentences): inputs = tokenizer.encode_plus(sentence, add_special_tokens=True, max_length=tokenizer.max_len) input_ids, token_type_ids = inputs['input_ids'], inputs['token_type_ids'] padding_length = tokenizer.max_len - len(input_ids) if tokenizer.padding_side == 'right': attention_mask = [1] * len(input_ids) + [0] * padding_length input_ids = input_ids + [tokenizer.pad_token_id] * padding_length token_type_ids = token_type_ids + [tokenizer.pad_token_type_id] * padding_length else: attention_mask = [0] * padding_length + [1] * len(input_ids) input_ids = [tokenizer.pad_token_id] * padding_length + input_ids token_type_ids = [tokenizer.pad_token_type_id] * padding_length + token_type_ids assert tokenizer.max_len == len(attention_mask) == len(input_ids) == len( token_type_ids), f'{tokenizer.max_len}, {len(attention_mask)}, {len(input_ids)}, {len(token_type_ids)}' feature = { 'input_ids': input_ids, 'attention_mask': attention_mask, 'token_type_ids': token_type_ids, 'label': int(labels[i]) } features.append(feature) def gen(): for feature in features: yield ( { 'input_ids': feature['input_ids'], 'attention_mask': feature['attention_mask'], 'token_type_ids': feature['token_type_ids'], }, feature['label'], ) dataset = data.Dataset.from_generator( gen, ({ 'input_ids': int32, 'attention_mask': int32, 'token_type_ids': int32 }, int64), ( { 'input_ids': TensorShape([None]), 'attention_mask': TensorShape([None]), 'token_type_ids': TensorShape([None]), }, TensorShape([]), ), ) return dataset
def _get_test_input_fn(self): dataset = tf.data.Dataset.from_generator( self._yield_test_samples, (tf.float32, tf.int32), output_shapes=(TensorShape( [Dimension(32), Dimension(32), Dimension(3)]), TensorShape(Dimension(10)))) dataset = dataset.map(lambda image, label: ({ self.FEATURE_NAME: image }, label)) dataset = dataset.batch(batch_size=self._batch_size) dataset = dataset.prefetch(self._prefetch_size) print_info("Dataset output sizes are: ") print_info(dataset.output_shapes) return dataset
def build(self, input_shape): dtype = dtypes.as_dtype(self.dtype or K.floatx()) if not (dtype.is_floating or dtype.is_complex): raise TypeError( 'Unable to build `Dense` layer with non-floating point ' 'dtype %s' % (dtype, )) input_shape = TensorShape(input_shape) if input_shape[-1] is None: raise ValueError('The last dimension of the inputs to `Dense` ' 'should be defined. Found `None`.') last_dim = input_shape[-1] self.input_spec = InputSpec(min_ndim=2, axes={-1: last_dim}) self.kernel = self.add_weight('kernel', shape=[last_dim, self.units], initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, dtype=self.dtype, trainable=True) if self.use_bias: self.bias = self.add_weight('bias', shape=[ self.units, ], initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint, dtype=self.dtype, trainable=True) else: self.bias = None self.built = True
def convert_data_to_tensor(data, data_spec): """ convert a python data object to a tensor. In case the object is a dict, it's content is converted instead. The data spec contains the tensorflow type to which the data will be converted. Both data data data_spec have the same structure Params: data: data_spec: return: Tensor, a dict of tensors or () """ if not data: return (), () elif isinstance(data, dict): policy_dict = {} policy_spec = {} for k, data in data.items(): policy_dict[k], policy_spec[k] = convert_data_to_tensor( data, data_spec[k]) return policy_dict, policy_spec else: # case the tensors with their expected data type t_type = data_spec.dtype tensor_data = tf.convert_to_tensor(data, dtype=t_type) # extract tensor spec and remove batch dimension t_shape = TensorShape(tensor_data.shape.dims[1:]) t_spec = tf.TensorSpec(t_shape, t_type) return tensor_data, t_spec
def compute_output_shape(self, input_shape): stride_product = np.product(WideResidualNetwork.STRIDES) return TensorShape( (input_shape[0], input_shape[1] // (stride_product * self.pool_size), input_shape[2] // (stride_product * self.pool_size), WideResidualNetwork.FILTER_SIZES[-1]))
def _generate_random_box_md(): w = np.random.randint(2, 41) h = np.random.randint(2, 41) l = np.random.randint(2, 41) scale = 0.01 bb = (np.array([[x, y, z] for x in [-w / 2, w / 2] for y in [-l / 2, l / 2] for z in [-h / 2, h / 2]]) + 32) * scale # bb += 32 - np.array([w/2]) d = { 'shape': TensorShape((64, 64, 64, 1)), 'augmentation': f'{l}_{w}_{h}', 'w': w, 'h': h, 'l': l, 'y_angle': 0, 'x_angle': 0, 'z_angle': 0, 'category': 'Axis Aligned Box', 'id': 'Axis Aligned Box', 'scale': scale, 'bounding_box': bb, } return d
def __init__(self, f_block_num_layers: int, f_block_units: int, s_block_num_layers: int, s_block_units: int, state_shape, batch_size: int, **kwargs): self.f_block_units = f_block_units self.s_block_units = s_block_units self.f_block_num_layers = f_block_num_layers self.s_block_num_layers = s_block_num_layers self.batch_size = batch_size self.state_shape = state_shape self.state_size = NoDependency([f_block_units, TensorShape(state_shape)]) super().__init__(**kwargs)
def tile_concat(values, axis): ### values为一个list,里边有多个tensor 5/26 ### 实现将多个tensor通过broadcasting机制,将他们tile到shape完全相同 ### 除了axis维度,可以保持不相同 5/26 shapes = [v.shape for v in values] # convert axis to positive form ndims = len(shapes[0]) for shape in shapes[1:]: assert ndims == len(shape) if -ndims < axis < 0: axis += ndims # remove axis dimension shapes = [list(s) for s in shapes] dims = [shape.pop(axis) for shape in shapes] ### pop 会弹出/返回 axis 位置处的值, shape也会改变 5/26 shapes = [TensorShape(shape) for shape in shapes] ### 来自 tf 6/2 # compute broadcasted shape b_shape = shapes[0] for shape in shapes[1:]: b_shape = broadcast_static_shape(b_shape, shape) ### 来自 tf 6/2 # add back axis dimension b_shapes = [list(b_shape) for _ in dims] for b_shape, dim in zip(b_shapes, dims): b_shape.insert(axis, dim) # tile values to match broadcasted shape, if necessary b_values = [] for value, b_shape in zip(values, b_shapes): multiples = [] for dim, b_dim in zip(list(value.shape), b_shape): if dim == b_dim: multiples.append(1) else: assert dim == 1 multiples.append(b_dim) if any(multiple != 1 for multiple in multiples): b_value = value.repeat(multiples) else: b_value = value b_values.append(b_value) return torch.cat(b_values, dim=axis)
def compute_output_shape(self, input_shape): return TensorShape( (input_shape[0], input_shape[1] // self.stride, input_shape[2] // self.stride, self.filters * self.k))
def compute_output_shape(self, input_shape): return TensorShape(( input_shape[0], self.number_of_classes ))