Exemplo n.º 1
0
def create_batch(dataset, hparams, is_training, batch_size=None):
    """Batch a dataset, optional batch_size override."""
    if not batch_size:
        batch_size = hparams.batch_size
    if hparams.max_expected_train_example_len and is_training:
        dataset = dataset.batch(batch_size, drop_remainder=True)
    elif hparams.split_pianoroll:
        dataset = dataset.padded_batch(
            batch_size,
            padded_shapes=(MultiFeatureTensors(
                TensorShape([None, 229, 1]), TensorShape([None, 229, 1]),
                TensorShape([hparams.timbre_num_classes])),
                           LabelTensors(
                               TensorShape(
                                   [None, 88, hparams.timbre_num_classes + 1]),
                               TensorShape(
                                   [None, 88, hparams.timbre_num_classes + 1]),
                               TensorShape(
                                   [None, 88, hparams.timbre_num_classes + 1]),
                           )),
            drop_remainder=True)
    else:
        dataset = dataset.padded_batch(
            batch_size,
            padded_shapes=(FeatureTensors(TensorShape([None, 229, 1])),
                           LabelTensors(
                               TensorShape([None, 88]),
                               TensorShape([None, 88]),
                               TensorShape([None, 88]),
                           )),
            drop_remainder=True)
    return dataset
    def _get_test_input_function(self):
        """
        Inheriting class must implement this
        :return: callable
        """
        dataset = tf.data.Dataset.from_generator(
            self._yield_test_samples, (tf.float32, tf.bool, tf.bool),
            output_shapes=(TensorShape([
                Dimension(self._hparams.frames_per_sample),
                Dimension(self._hparams.neff)
            ]),
                           TensorShape([
                               Dimension(self._hparams.frames_per_sample),
                               Dimension(self._hparams.neff)
                           ]),
                           TensorShape([
                               Dimension(self._hparams.frames_per_sample),
                               Dimension(self._hparams.neff),
                               Dimension(2)
                           ])))

        dataset = dataset.map(
            self.feature_map_func,
            num_parallel_calls=self._hparams.num_parallel_calls)

        dataset = dataset.batch(batch_size=self._hparams.batch_size,
                                drop_remainder=True)
        dataset = dataset.prefetch(self._hparams.prefetch_size)
        dataset = dataset.cache(
            filename=os.path.join(self.iterator_dir, "test_data_cache"))
        print_info("Dataset output sizes are: ")
        print_info(dataset.output_shapes)
        return dataset
Exemplo n.º 3
0
    def _get_test_input_function(self):
        """
        Inheriting class must implement this
        :return: callable
        """
        dataset = tf.data.Dataset.from_generator(
            self._yield_test_samples, (tf.float32, tf.bool, tf.bool),
            output_shapes=(TensorShape([
                Dimension(self._hparams.frames_per_sample),
                Dimension(self._hparams.neff)
            ]),
                           TensorShape([
                               Dimension(self._hparams.frames_per_sample),
                               Dimension(self._hparams.neff)
                           ]),
                           TensorShape([
                               Dimension(self._hparams.frames_per_sample),
                               Dimension(self._hparams.neff),
                               Dimension(2)
                           ])))
        # Map the generator output as features as a dict and labels
        dataset = dataset.map(lambda x, y, z: ({
            self.FEATURE_1_NAME: x,
            self.FEATURE_2_NAME: y
        }, z))

        dataset = dataset.batch(batch_size=self._hparams.batch_size,
                                drop_remainder=True)
        dataset = dataset.prefetch(self._hparams.prefetch_size)
        # dataset = dataset.cache(filename=os.path.join(self.iterator_dir, "test_data_cache"))
        print_info("Dataset output sizes are: ")
        print_info(dataset.output_shapes)
        return dataset
    def _user_resize_func(self, sample, vad, label):
        """
        Function that sets up the sizes of the tensor, after execution of `tf.py_func` call
        :param data:
        :param label:
        :return:
        """

        sample = tf.reshape(sample,
                            shape=TensorShape([
                                Dimension(self._hparams.dummy_slicing_dim),
                                Dimension(self._hparams.neff)
                            ]))
        vad = tf.reshape(vad,
                         shape=TensorShape([
                             Dimension(self._hparams.dummy_slicing_dim),
                             Dimension(self._hparams.neff)
                         ]))
        label = tf.reshape(label,
                           shape=TensorShape([
                               Dimension(self._hparams.dummy_slicing_dim),
                               Dimension(self._hparams.neff),
                               Dimension(2)
                           ]))
        return ({self.FEATURE_1_NAME: sample, self.FEATURE_2_NAME: vad}, label)
Exemplo n.º 5
0
    def _multi_worker_init(**kwargs):
        replica_ctx = get_replica_context()
        global_id = replica_ctx.replica_id_in_sync_group
        if global_id == 0:
            unique_id = kit_lib.get_nccl_unique_id()
            re = collective_ops.broadcast_send(
                unique_id,
                TensorShape([
                    32,
                ]),
                int32,
                group_size=replica_ctx.num_replicas_in_sync,
                group_key=1,
                instance_key=2)
        else:
            re = collective_ops.broadcast_recv(
                TensorShape([
                    32,
                ]),
                int32,
                group_size=replica_ctx.num_replicas_in_sync,
                group_key=1,
                instance_key=2)
        if global_id == 0:
            global_seed = kwargs.get("seed", None) or kit_lib.gen_random_seed()
            re_seed = collective_ops.broadcast_send(
                global_seed,
                TensorShape([
                    1,
                ]),
                int64,
                group_size=replica_ctx.num_replicas_in_sync,
                group_key=1,
                instance_key=3)
        else:
            global_seed = kwargs.get("seed", None)
            re_seed = collective_ops.broadcast_recv(
                TensorShape([
                    1,
                ]),
                int64,
                group_size=replica_ctx.num_replicas_in_sync,
                group_key=1,
                instance_key=3)

            if (global_seed and global_seed != re_seed):
                logging.warning(
                    "The seed: {} is not consistent with that from cheif-node: {}, "
                    "and the seed from cheif-node will be used.".format(
                        global_seed, re_seed))

        visible_devices = _get_visible_devices()
        status = kit_lib.plugin_init(
            global_id,
            replica_ctx.num_replicas_in_sync,
            re,
            re_seed,
            visible_devices,
            global_batch_size=kwargs['global_batch_size'])
        return status
Exemplo n.º 6
0
def get_features(tokenizer, sentences, labels):
    features = []
    for i, sentence in enumerate(sentences):
        inputs = tokenizer.encode_plus(sentence, add_special_tokens=True, max_length=tokenizer.max_len)
        input_ids, token_type_ids = inputs['input_ids'], inputs['token_type_ids']
        padding_length = tokenizer.max_len - len(input_ids)

        if tokenizer.padding_side == 'right':
            attention_mask = [1] * len(input_ids) + [0] * padding_length
            input_ids = input_ids + [tokenizer.pad_token_id] * padding_length
            token_type_ids = token_type_ids + [tokenizer.pad_token_type_id] * padding_length
        else:
            attention_mask = [0] * padding_length + [1] * len(input_ids)
            input_ids = [tokenizer.pad_token_id] * padding_length + input_ids
            token_type_ids = [tokenizer.pad_token_type_id] * padding_length + token_type_ids

        assert tokenizer.max_len == len(attention_mask) == len(input_ids) == len(
            token_type_ids), f'{tokenizer.max_len}, {len(attention_mask)}, {len(input_ids)}, {len(token_type_ids)}'

        feature = {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'token_type_ids': token_type_ids,
            'label': int(labels[i])
        }

        features.append(feature)

    def gen():
        for feature in features:
            yield (
                {
                    'input_ids': feature['input_ids'],
                    'attention_mask': feature['attention_mask'],
                    'token_type_ids': feature['token_type_ids'],
                },
                feature['label'],
            )

    dataset = data.Dataset.from_generator(
        gen,
        ({
            'input_ids': int32,
            'attention_mask': int32,
            'token_type_ids': int32
        }, int64),
        (
            {
                'input_ids': TensorShape([None]),
                'attention_mask': TensorShape([None]),
                'token_type_ids': TensorShape([None]),
            },
            TensorShape([]),
        ),
    )

    return dataset
    def _get_test_input_fn(self):

        dataset = tf.data.Dataset.from_generator(
            self._yield_test_samples, (tf.float32, tf.int32),
            output_shapes=(TensorShape(
                [Dimension(32), Dimension(32),
                 Dimension(3)]), TensorShape(Dimension(10))))
        dataset = dataset.map(lambda image, label: ({
            self.FEATURE_NAME: image
        }, label))
        dataset = dataset.batch(batch_size=self._batch_size)
        dataset = dataset.prefetch(self._prefetch_size)
        print_info("Dataset output sizes are: ")
        print_info(dataset.output_shapes)
        return dataset
Exemplo n.º 8
0
 def build(self, input_shape):
     dtype = dtypes.as_dtype(self.dtype or K.floatx())
     if not (dtype.is_floating or dtype.is_complex):
         raise TypeError(
             'Unable to build `Dense` layer with non-floating point '
             'dtype %s' % (dtype, ))
     input_shape = TensorShape(input_shape)
     if input_shape[-1] is None:
         raise ValueError('The last dimension of the inputs to `Dense` '
                          'should be defined. Found `None`.')
     last_dim = input_shape[-1]
     self.input_spec = InputSpec(min_ndim=2, axes={-1: last_dim})
     self.kernel = self.add_weight('kernel',
                                   shape=[last_dim, self.units],
                                   initializer=self.kernel_initializer,
                                   regularizer=self.kernel_regularizer,
                                   constraint=self.kernel_constraint,
                                   dtype=self.dtype,
                                   trainable=True)
     if self.use_bias:
         self.bias = self.add_weight('bias',
                                     shape=[
                                         self.units,
                                     ],
                                     initializer=self.bias_initializer,
                                     regularizer=self.bias_regularizer,
                                     constraint=self.bias_constraint,
                                     dtype=self.dtype,
                                     trainable=True)
     else:
         self.bias = None
     self.built = True
Exemplo n.º 9
0
def convert_data_to_tensor(data, data_spec):
    """
    convert a python data object to a tensor. In case the object is a dict, it's content is converted instead.
    The data spec contains the tensorflow type to which the data will be converted. Both data data data_spec
    have the same structure

    Params:
        data:
        data_spec:
    return:
        Tensor, a dict of tensors or ()
    """
    if not data:
        return (), ()
    elif isinstance(data, dict):
        policy_dict = {}
        policy_spec = {}

        for k, data in data.items():
            policy_dict[k], policy_spec[k] = convert_data_to_tensor(
                data, data_spec[k])

        return policy_dict, policy_spec
    else:
        # case the tensors with their expected data type
        t_type = data_spec.dtype
        tensor_data = tf.convert_to_tensor(data, dtype=t_type)

        # extract tensor spec and remove batch dimension
        t_shape = TensorShape(tensor_data.shape.dims[1:])
        t_spec = tf.TensorSpec(t_shape, t_type)
        return tensor_data, t_spec
Exemplo n.º 10
0
Arquivo: res.py Projeto: MilesGrey/emd
 def compute_output_shape(self, input_shape):
     stride_product = np.product(WideResidualNetwork.STRIDES)
     return TensorShape(
         (input_shape[0],
          input_shape[1] // (stride_product * self.pool_size),
          input_shape[2] // (stride_product * self.pool_size),
          WideResidualNetwork.FILTER_SIZES[-1]))
Exemplo n.º 11
0
    def _generate_random_box_md():
        w = np.random.randint(2, 41)
        h = np.random.randint(2, 41)
        l = np.random.randint(2, 41)
        scale = 0.01

        bb = (np.array([[x, y, z] for x in [-w / 2, w / 2]
                        for y in [-l / 2, l / 2]
                        for z in [-h / 2, h / 2]]) + 32) * scale
        # bb += 32 - np.array([w/2])

        d = {
            'shape': TensorShape((64, 64, 64, 1)),
            'augmentation': f'{l}_{w}_{h}',
            'w': w,
            'h': h,
            'l': l,
            'y_angle': 0,
            'x_angle': 0,
            'z_angle': 0,
            'category': 'Axis Aligned Box',
            'id': 'Axis Aligned Box',
            'scale': scale,
            'bounding_box': bb,
        }
        return d
Exemplo n.º 12
0
    def __init__(self, f_block_num_layers: int, f_block_units: int, s_block_num_layers: int,
                 s_block_units: int, state_shape, batch_size: int, **kwargs):
        self.f_block_units = f_block_units
        self.s_block_units = s_block_units
        self.f_block_num_layers = f_block_num_layers
        self.s_block_num_layers = s_block_num_layers
        self.batch_size = batch_size

        self.state_shape = state_shape
        self.state_size = NoDependency([f_block_units, TensorShape(state_shape)])
        super().__init__(**kwargs)
Exemplo n.º 13
0
def tile_concat(values, axis):
    ### values为一个list,里边有多个tensor 5/26
    ### 实现将多个tensor通过broadcasting机制,将他们tile到shape完全相同
    ### 除了axis维度,可以保持不相同 5/26
    shapes = [v.shape for v in values]
    # convert axis to positive form
    ndims = len(shapes[0])
    for shape in shapes[1:]:
        assert ndims == len(shape)
    if -ndims < axis < 0:
        axis += ndims
    # remove axis dimension
    shapes = [list(s) for s in shapes]
    dims = [shape.pop(axis) for shape in shapes]   ### pop 会弹出/返回 axis 位置处的值, shape也会改变 5/26
    shapes = [TensorShape(shape) for shape in shapes]   ### 来自 tf 6/2
    # compute broadcasted shape
    b_shape = shapes[0]
    for shape in shapes[1:]:
        b_shape = broadcast_static_shape(b_shape, shape)  ### 来自 tf 6/2
    # add back axis dimension
    b_shapes = [list(b_shape) for _ in dims]
    for b_shape, dim in zip(b_shapes, dims):
        b_shape.insert(axis, dim)
    # tile values to match broadcasted shape, if necessary
    b_values = []
    for value, b_shape in zip(values, b_shapes):
        multiples = []
        for dim, b_dim in zip(list(value.shape), b_shape):
            if dim == b_dim:
                multiples.append(1)
            else:
                assert dim == 1
                multiples.append(b_dim)
        if any(multiple != 1 for multiple in multiples):
            b_value = value.repeat(multiples)
        else:
            b_value = value
        b_values.append(b_value)
    return torch.cat(b_values, dim=axis)
Exemplo n.º 14
0
Arquivo: res.py Projeto: MilesGrey/emd
 def compute_output_shape(self, input_shape):
     return TensorShape(
         (input_shape[0], input_shape[1] // self.stride,
          input_shape[2] // self.stride, self.filters * self.k))
Exemplo n.º 15
0
 def compute_output_shape(self, input_shape):
     return TensorShape((
         input_shape[0],
         self.number_of_classes
     ))