def __init__(self, make_variant_fn, columns, output_types, output_shapes=None, batch_size=None, batch_mode='keep_remainder'): self._columns = columns self._structure = structure_lib.convert_legacy_structure( output_types, output_shapes or nest.map_structure( lambda _: tf.TensorShape(None), output_types), nest.map_structure(lambda _: tf.Tensor, output_types)) self._batch_size = tf.convert_to_tensor(batch_size or 0, dtype=dtypes.int64, name="batch_size") if batch_mode not in self.batch_modes_supported: raise ValueError( "Unsupported batch_mode: '{}', must be one of {}".format( batch_mode, self.batch_modes_supported)) self._batch_mode = tf.convert_to_tensor(batch_mode, dtypes.string, name="batch_mode") if batch_size is not None or batch_mode == 'auto': spec_batch_size = batch_size if batch_mode == 'drop_remainder' else None # pylint: disable=protected-access self._structure = nest.map_structure( lambda component_spec: component_spec._batch(spec_batch_size), self._structure) print(self._flat_structure) variant_tensor = make_variant_fn(columns=self._columns, batch_size=self._batch_size, batch_mode=self._batch_mode, **self._flat_structure) super(ArrowBaseDataset, self).__init__(variant_tensor)
def __init__(self, columns, output_types, output_shapes=None): self._columns = columns self._output_types = output_types self._output_shapes = output_shapes or \ nest.map_structure( lambda _: tensorflow.TensorShape(None), self._output_types) super(ArrowBaseDataset, self).__init__()
def __init__(self, host, columns, output_types, output_shapes=None): """Create an ArrowDataset from an input stream. Args: host: A `tf.string` tensor or Python string defining the input stream. For a socket client, use "<HOST_IP>:<PORT>", for stdin use "STDIN". columns: A list of column indices to be used in the Dataset output_types: Tensor dtypes of the output tensors output_shapes: TensorShapes of the output tensors or None to infer partial """ self._columns = columns self._output_types = output_types self._output_shapes = output_shapes or \ nest.map_structure( lambda _: tensorflow.TensorShape(None), self._output_types) self._host = tensorflow.convert_to_tensor( host, dtype=dtypes.string, name="host") super(ArrowStreamDataset, self).__init__(columns, output_types, output_shapes)
def __init__(self, filenames, columns, output_types, output_shapes=None): """Create an ArrowDataset from one or more Feather file names. Args: filenames: A `tf.string` tensor, Python list or scalar containing files in Arrow Feather format columns: A list of column indices to be used in the Dataset output_types: Tensor dtypes of the output tensors output_shapes: TensorShapes of the output tensors or None to infer partial """ self._columns = columns self._output_types = output_types self._output_shapes = output_shapes or \ nest.map_structure( lambda _: tensorflow.TensorShape(None), self._output_types) self._filenames = tensorflow.convert_to_tensor( filenames, dtype=dtypes.string, name="filenames") super(ArrowFeatherDataset, self).__init__(columns, output_types, output_shapes)
def __init__(self, record_batches, columns, output_types, output_shapes=None): """Create an ArrowDataset directly from Arrow record batches. This constructor requires pyarrow to be installed. Args: record_batches: An Arrow record batch or sequence of record batches columns: A list of column indices to be used in the Dataset output_types: Tensor dtypes of the output tensors output_shapes: TensorShapes of the output tensors or None to infer partial """ self._columns = columns self._output_types = output_types self._output_shapes = output_shapes or \ nest.map_structure( lambda _: tensorflow.TensorShape(None), self._output_types) import pyarrow as pa if isinstance(record_batches, pa.RecordBatch): record_batches = [record_batches] assert record_batches buf = io.BytesIO() writer = pa.RecordBatchFileWriter(buf, record_batches[0].schema) for batch in record_batches: writer.write_batch(batch) writer.close() self._serialized_batches = tensorflow.convert_to_tensor( buf.getvalue(), dtype=dtypes.string, name="serialized_batches") super(ArrowDataset, self).__init__(columns, output_types, output_shapes)
def step(self, action, state): state = nest.map_structure( lambda x: tf.where(should_reset > 0, tf.zeros_like(x), x), state) reward, done, obs = aleop.ale(action, should_reset, max_episode_length, level_name, frameskip_min=num_action_repeats, frameskip_max=num_action_repeats) reward.set_shape(()) reward = tf.clip_by_value(reward, -1., 1.) done.set_shape(()) obs.set_shape((210, 160, 3)) obs = preprocess(obs) state_episode_return = tf.where(done, tf.zeros(()), state.episode_return + reward) state_episode_step = tf.where(done, tf.zeros((), tf.int64), state.episode_step + 1) state_updated_info = StepOutputInfo( episode_return=state_episode_return, episode_step=state_episode_step) updated_info = StepOutputInfo(episode_return=state.episode_return + reward, episode_step=state.episode_step + 1) return StepOutput(reward=reward, info=updated_info, done=done, observation=[obs]), state_updated_info
def preprocess(x): """Cast to float, normalize, and concatenate images along last axis.""" x = nest.map_structure( lambda image: tf.image.convert_image_dtype(image, tf.float32), x) x = nest.flatten(x) x = tf.concat(x, axis=-1) x = (tf.image.convert_image_dtype(x, tf.float32) - 0.5) * 2.0 return x
def call(self, observations, step_type=(), network_state=()): outer_rank = nest_utils.get_outer_rank(observations, self.input_tensor_spec) batch_squash = BatchSquash(outer_rank) observations = nest.map_structure(batch_squash.flatten, observations) state, network_state = self._encoder(observations, step_type=step_type, network_state=network_state) actions = self._action_projection_layer(state) actions = scale_to_spec(actions, self._single_action_spec) actions = batch_squash.unflatten(actions) return nest.pack_sequence_as(self._action_spec, [actions]), network_state
def __init__(self, serialized_batches, columns, output_types, output_shapes=None): """Create an ArrowDataset from a Tensor of serialized batches. This constructor requires pyarrow to be installed. Args: serialized_batches: A string Tensor as a serialized buffer containing Arrow record batches as Arrow file format columns: A list of column indices to be used in the Dataset output_types: Tensor dtypes of the output tensors output_shapes: TensorShapes of the output tensors or None to infer partial """ self._serialized_batches = serialized_batches self._columns = columns self._output_types = output_types self._output_shapes = output_shapes or \ nest.map_structure( lambda _: tensorflow.TensorShape(None), self._output_types) super(ArrowDataset, self).__init__(columns, output_types, output_shapes)
def output_classes(self): return nest.map_structure(lambda _: tensorflow.Tensor, self._output_types)
def output_shapes(self): return nest.map_structure(lambda _: tensorflow.TensorShape([]), self._output_types)
def preprocess(dataset): def batch_format_fn(element): """Flatten a batch `pixels` and return the features as an `OrderedDict`.""" return collections.OrderedDict(x=reshape(element['pixels'], [-1, 28, 28, 1]), y=reshape(element['label'], [-1, 1])) return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER).batch( BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER) preprocessed_sample_dataset = preprocess(sample_dataset) sample_batch = nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_sample_dataset))) def make_federated_data(client_data, client_ids): return [ preprocess(client_data.create_tf_dataset_for_client(x)) for x in client_ids ] federated_train_data = make_federated_data(train_dataset, train_dataset.client_ids) print('Number of client datasets: {l}'.format(l=len(federated_train_data))) print('First dataset: {d}'.format(d=federated_train_data[0]))
def cast_and_concat(x): x = nest.map_structure(training_utils.cast_if_floating_dtype, x) x = nest.flatten(x) x = tf.concat(x, axis=-1) return x