def one_step(self, new_chain_state, current_reducer_state, previous_kernel_results): # pylint: disable=unused-argument """Advance progress bar by one result. All arguments are ignored. Args: new_chain_state: A (possibly nested) structure of incoming chain state(s) with shape and dtype compatible with those used to initialize the `TracingState`. current_reducer_state: `TracingState`s representing all previously traced results. previous_kernel_results: A (possibly nested) structure of `Tensor`s representing internal calculations made in a related `TransitionKernel`. Returns: new_reducer_state: empty list. """ def update_bar(): try: next(self.bar) except StopIteration: pass tf.py_function(update_bar, (), ()) return []
def __init__(self): """ ********************************************* *****************Constructor***************** ********************************************* """ # 👇 contains the ted_hrlr_translate/pt_to_en tf.data.Dataset # train split, loaded as_supervided self.data_train = tfds.load('ted_hrlr_translate/pt_to_en', split='train', as_supervised=True) # 👇 contains the ted_hrlr_translate/pt_to_en tf.data.Dataset # validate split, loaded as_supervided self.data_valid = tfds.load('ted_hrlr_translate/pt_to_en', split='validation', as_supervised=True) pt, en = self.tokenize_dataset(self.data_train) # the Portuguese tokenizer created from the training set self.tokenizer_pt = pt # the English tokenizer created from the training set self.tokenizer_en = en self.data_train = self.data_train.map(lambda x, y: tf.py_function( self.tf_encode, [x, y], (tf.int64, tf.int64))) self.data_valid = self.data_valid.map(lambda x, y: tf.py_function( self.tf_encode, [x, y], (tf.int64, tf.int64)))
def __init__(self, batch_size, max_len): self.data_train = tfds.load('ted_hrlr_translate/pt_to_en', split='train', as_supervised=True) self.data_valid = tfds.load('ted_hrlr_translate/pt_to_en', split='validation', as_supervised=True) pt, en = self.tokenize_dataset(self.data_train) self.tokenizer_pt = pt self.tokenizer_en = en self.data_train = self.data_train.map(lambda x, y: tf.py_function( self.tf_encode, [x, y], (tf.int64, tf.int64))) self.data_valid = self.data_valid.map(lambda x, y: tf.py_function( self.tf_encode, [x, y], (tf.int64, tf.int64))) self.data_train = self.data_train.filter( lambda x, y: len(x) <= max_len and len(y) <= max_len) self.data_valid = self.data_valid.filter( lambda x, y: len(x) <= max_len and len(y) <= max_len) self.data_train = self.data_train.cache().shuffle(10000000) self.data_train = self.data_train.padded_batch(batch_size, ([None], [None])) self.data_train = self.data_train.prefetch( tf.data.experimental.AUTOTUNE) self.data_valid = self.data_valid.padded_batch(batch_size, ([None], [None]))
def shard_train_batch(images, labels): """Converts 4D input into 5D input where first dimension denotes cores.""" boxes = labels[ssd_constants.BOXES] classes = labels[ssd_constants.CLASSES] num_matched_boxes = labels[ssd_constants.NUM_MATCHED_BOXES] local_num_replicas = params['local_num_replicas'] images = split_a_tensor_or_a_dict(images, local_num_replicas) boxes = split_a_tensor_or_a_dict(boxes, local_num_replicas) classes = split_a_tensor_or_a_dict(classes, local_num_replicas) num_matched_boxes = split_a_tensor_or_a_dict( tf.reshape(num_matched_boxes, [-1]), local_num_replicas) if params['conv0_space_to_depth']: def _space_to_depth_training_fn(images, labels): images = fused_transpose_and_space_to_depth( images, block_size=ssd_constants.SPACE_TO_DEPTH_BLOCK_SIZE, transpose_input=transpose_input) if transpose_input: labels = tf.transpose(labels, [0, 2, 3, 1]) images = pad_images_if_uneven(images) return images, labels images, boxes = _space_to_depth_training_fn(images, boxes) elif transpose_input: # numpy's 5D tranpose is faster than tf 5D transpose. # pylint: disable=protected-access def np_transpose_bs_gt_8(x): return tf.convert_to_tensor(x._numpy().transpose( [0, 2, 3, 4, 1])) def np_transpose_bs_le_8(x): return tf.convert_to_tensor(x._numpy().transpose( [0, 2, 3, 1, 4])) # pylint: enable=protected-access if host_batch_size // params['local_num_replicas'] > 8: images = tf.py_function(np_transpose_bs_gt_8, [images], Tout=images.dtype) else: images = tf.py_function(np_transpose_bs_le_8, [images], Tout=images.dtype) # Use tf tranpose on 4D tensor. boxes = tf.transpose(boxes, [0, 2, 3, 1]) return (images, { ssd_constants.BOXES: boxes, ssd_constants.CLASSES: classes, ssd_constants.NUM_MATCHED_BOXES: num_matched_boxes })
def parse_and_select_from_tfrecord2(self, raw_proto): """Dataset map function that parses a TFRecord example and select fields.""" # https://stackoverflow.com/questions/41951433/tensorflow-valueerror-shape-must-be-rank-1-but-is-rank-0-for-parseexample-pa parsed_features = tf.io.parse_example([raw_proto], self.features) self._in1_preprocessors = self.preprocess_list(self.in1_fields, self.frame_rate) # pylint: disable=g-complex-comprehension in_data = tf.concat([ tf.py_function( pp.process, inp=[parsed_features[pp.name]], Tout=tf.float32) for pp in self._in1_preprocessors ], axis=1) in_data = tf.reshape(in_data, (-1, ), name='input1_reshape') if self.in2_fields: self._in2_preprocessors = self.preprocess_list( self.in2_fields, self.frame_rate) # pylint: disable=g-complex-comprehension in2_data = tf.concat([ tf.py_function(pp.process, inp=[parsed_features[pp.name]], Tout=tf.float32) for pp in self._in2_preprocessors ], axis=1) in2_data = tf.reshape(in2_data, (-1, ), name='input2_reshape') else: in2_data = in_data[0:1] self._out_preprocessors = self.preprocess_list([self.out_field], self.frame_rate) # pylint: disable=g-complex-comprehension out_data = tf.concat([ tf.py_function( pp.process, inp=[parsed_features[pp.name]], Tout=tf.float32) for pp in self._out_preprocessors ], axis=1) out_data = tf.reshape(out_data, (-1, ), name='output_reshape') if self.attended_direction: attended_data = parsed_features[self.attended_direction] attended_data = tf.reshape(attended_data, (-1), name='attended_reshape') else: attended_data = None return in_data, in2_data, out_data, attended_data
def tf_encode(x): src, tgt = x['inputs'], x['targets'] result_src, result_tgt = tf.py_function(encode, [src, tgt], [tf.int64, tf.int64]) result_src.set_shape([None]) result_tgt.set_shape([None]) return {'inputs': result_src, 'targets': result_tgt}
def tf_encode(self, pt, en): """acts as a tensorflow wrapper for the encode instance method""" pt_wrap, en_wrap = tf.py_function( self.encode, [pt, en], [tf.int64, tf.int64]) pt_wrap.set_shape([None]) en_wrap.set_shape([None]) return pt_wrap, en_wrap
def shard_batch(batch): """Shards batch for local devices.""" images, labels = batch['image'], batch['label'] local_device_count = jax.local_device_count() batch_size = tf.shape(images)[0] if batch_size % local_device_count != 0: # We don't need all hosts to have the same batch size but (for now) # we need all devices on each host to have the same batch size. clipped_batch = (batch_size // local_device_count) * local_device_count images = images[:clipped_batch] labels = labels[:clipped_batch] if space_to_depth: images = tf.reshape(images, [ local_device_count, -1, image_size // 2, 2, image_size // 2, 2 * 3 ]) images = tf.transpose(images, [0, 1, 2, 4, 3, 5]) images = tf.reshape(images, [ local_device_count, -1, image_size // 2, image_size // 2, 4 * 3 ]) else: images = tf.reshape( images, [local_device_count, -1, image_size, image_size, 3]) if transpose_images: def numpy_transpose(x): return tf.convert_to_tensor(x._numpy().transpose( [0, 2, 3, 4, 1])) # pylint: disable=protected-access images = tf.py_function(numpy_transpose, [images], dtype) # apparently [0, 2, 3, 1, 4] is better for per-device batch <= 8? labels = tf.reshape(labels, [local_device_count, -1]) batch.update(image=images, label=labels) return batch
def augment(self, image, label): if self.augmentation: image = tf.image.random_flip_left_right(image) image = tf.py_function(self.random_shift, inp=[image], Tout=self.dtype) return image, label
def tf_encode(self, pt, en): """function that wraps the 'encode' methods instance""" result_pt, result_en = tf.py_function(self.encode, [pt, en], [tf.int64, tf.int64]) result_pt.set_shape([None]) result_en.set_shape([None]) return result_pt, result_en
def tf_encode(self, pt, en): """ acts as a tensorflow wrapper for the encode instance method """ result_pt, result_en = tf.py_function(self.encode, [pt, en], [tf.int64, tf.int64]) result_pt.set_shape([None]) result_en.set_shape([None]) return result_pt, result_en
def tf_encode(self, pt, en): """ return tensors """ result_pt, result_en = tf.py_function(self.encode, [pt, en], [tf.int64, tf.int64]) result_pt.set_shape([None]) result_en.set_shape([None]) return result_pt, result_en
def load_linear_probe_train_data(model, layer_idx, input_shape, batch_size, data_path=None): """Loads train data for linear probe experiments.""" buffer_size = 50000 if 'tiny' in data_path: train_dataset = tfds.load(name='cifar10', split='train[:6%]', as_supervised=True) else: train_dataset = tfds.load(name='cifar10', split='train', as_supervised=True) if 'tiny' in data_path: buffer_size //= 16 train_dataset = train_dataset.shuffle(buffer_size=buffer_size) processing_fn = lambda x, y: tf.py_function( inp=(x, y), func=functools.partial(preprocess_linear_probe_data, model=model, layer_idx=layer_idx, is_training=True, pooling=FLAGS.pooling), Tout=[tf.float32, tf.int64]) train_dataset = train_dataset.map(processing_fn) train_dataset = train_dataset.batch(batch_size, drop_remainder=True) train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE) return train_dataset
def pitch_shift(audio, semitones): def librosa_pitch_shift(x, semitones): return librosa.effects.pitch_shift( x.numpy(), SAMPLE_RATE_HZ, n_steps=semitones) return tf.py_function( func=librosa_pitch_shift, inp=[audio, semitones], Tout=tf.float32)
def tf_encode(self, pt, en): """ Tensorflow wrapper for the encode instance method """ tf_pt, tf_en = tf.py_function(self.encode, inp=[pt, en], Tout=[tf.int64, tf.int64]) tf_pt.set_shape([None]) tf_en.set_shape([None]) return tf_pt, tf_en
def tf_encode(self, pt, en): """Method""" pt_encoded, en_encoded = tf.py_function(func=self.encode, inp=[pt, en], Tout=[tf.int64, tf.int64]) pt_encoded.set_shape([None]) en_encoded.set_shape([None]) return pt_encoded, en_encoded
def tf_encode(self, pt, en): """A wrapper for encode""" res_pt, res_en = tf.py_function(self.encode, [pt, en], [tf.int64, tf.int64]) res_pt.set_shape([None]) res_en.set_shape([None]) return res_pt, res_en
def _decode_tf(self, ids): """Decode in TensorFlow. Args: ids: a 1d tf.Tensor with dtype tf.int32 Returns: a tf Scalar with dtype tf.string """ return tf.py_function(func=self.decode, inp=[ids], Tout=tf.string)
def __init__(self): self.data_train = tfds.load('ted_hrlr_translate/pt_to_en', split='train', as_supervised=True) self.data_valid = tfds.load('ted_hrlr_translate/pt_to_en', split='validation', as_supervised=True) pt, en = self.tokenize_dataset(self.data_train) self.tokenizer_pt = pt self.tokenizer_en = en self.data_train = self.data_train.map(lambda x, y: tf.py_function(self.tf_encode, [x, y], (tf.int64, tf.int64))) self.data_valid = self.data_valid.map(lambda x, y: tf.py_function(self.tf_encode, [x, y], (tf.int64, tf.int64)))
def include_spectrogram(tensor, hparams=None): """Include the spectrogram in our tensor dictionary""" spec = tf.py_function( functools.partial(create_timbre_spectrogram, hparams=hparams), [tensor['audio']], tf.float32) return dict(spec=spec, note_croppings=tensor['note_croppings'], instrument_families=tensor['instrument_families'])
def tf_encode(self, pt, en): """ tensorflow encode """ pt_tokens, en_tokens = tf.py_function(func=self.encode, inp=[pt, en], Tout=[tf.int64, tf.int64]) pt_tokens.set_shape([None]) en_tokens.set_shape([None]) return pt_tokens, en_tokens
def tf_encode(self, pt, en): '''a tensorflow wrapper for the encode instance method Args: pt is a np.ndarray containing the Portuguese tokens en is a np.ndarray. containing the English tokens Returns: a tensorflow wrapper ''' return tf.py_function(self.encode, [pt, en], [tf.int64, tf.int64])
def encode_map_fn(text, label): encoded_text, label = tf.py_function(encode, inp=[text, tf.cast(label, tf.int64)], Tout=(tf.int64, tf.int64)) encoded_text.set_shape([None]) label.set_shape([]) return encoded_text, label
def tf_encode(self, pt, en): """ Make sure to set the shape of the pt and en return tensors """ result_pt, result_en = tf.py_function(self.encode, [pt, en], [tf.int64, tf.int64]) result_pt.set_shape([None]) result_en.set_shape([None]) return (result_pt, result_en)
def tf_encode(self, pt, en): """ Tf wrapper for encoding makes use of tf.py_function for encode """ pt_tok, en_tok = tf.py_function(func=self.encode, inp=[pt, en], Tout=[tf.int64, tf.int64]) pt_tok.set_shape([None]) en_tok.set_shape([None]) return pt_tok, en_tok
def tf_encode(self, pt, en): '''acts as a tensorflow wrapper for the encode instance method Args: pt: tf.Tensor containing the Portuguese sentence en: tf.Tensor containing the corresponding English sentence ''' result_pt, result_en = tf.py_function(self.encode, [pt, en], [tf.int64, tf.int64]) result_pt.set_shape([None]) result_en.set_shape([None]) return result_pt, result_en
def tf_encode(self, pt, en): """wrapper for the encode instance method Args: Make sure to set the shape of the pt and en return tensors """ rslt_pt, rsl_en = tf.py_function(self.encode, [pt, en], [tf.int64, tf.int64]) rslt_pt.set_shape([None]) rsl_en.set_shape([None]) return rslt_pt, rsl_en
def __init__(self, batch_size, max_len): """ ********************************************* *****************Constructor***************** ********************************************* @batch_size: is the batch size for training/validation @max_len: is the maximum number of tokens allowed per example sentence """ # 👇 contains the ted_hrlr_translate/pt_to_en tf.data.Dataset # train split, loaded as_supervided self.data_train = tfds.load('ted_hrlr_translate/pt_to_en', split='train', as_supervised=True) # 👇 contains the ted_hrlr_translate/pt_to_en tf.data.Dataset # validate split, loaded as_supervided self.data_valid = tfds.load('ted_hrlr_translate/pt_to_en', split='validation', as_supervised=True) pt, en = self.tokenize_dataset(self.data_train) # the Portuguese tokenizer created from the training set self.tokenizer_pt = pt # the English tokenizer created from the training set self.tokenizer_en = en self.data_train = self.data_train.map(lambda x, y: tf.py_function( self.tf_encode, [x, y], (tf.int64, tf.int64))) self.data_valid = self.data_valid.map(lambda x, y: tf.py_function( self.tf_encode, [x, y], (tf.int64, tf.int64))) self.data_train = self.data_train.filter( lambda x, y: len(x) <= max_len and len(y) <= max_len) self.data_valid = self.data_valid.filter( lambda x, y: len(x) <= max_len and len(y) <= max_len) self.data_train = self.data_train.cache().shuffle(10000000) self.data_train = self.data_train.padded_batch(batch_size, ([None], [None])) self.data_train = self.data_train.prefetch( tf.data.experimental.AUTOTUNE) self.data_valid = self.data_valid.padded_batch(batch_size, ([None], [None]))
def tf_encode(self, pt, en): """[Method that acts as a tensorflow wrapper for the encode instance method] Args: pt ([type]): [description] en ([type]): [description] """ p, e = tf.py_function(self.encode, [pt, en], [tf.int64, tf.int64]) p.set_shape([None]), e.set_shape([None]) return p, e
def initialize(self, initial_chain_state, initial_kernel_results=None): # pylint: disable=unused-argument """Initialize progress bars. All arguments are ignored. Args: initial_chain_state: A (possibly nested) structure of `Tensor`s or Python `list`s of `Tensor`s representing the current state(s) of the Markov chain(s). It is used to infer the structure of future trace results. initial_kernel_results: A (possibly nested) structure of `Tensor`s representing internal calculations made in a related `TransitionKernel`. It is used to infer the structure of future trace results. Returns: state: empty list. """ num_results = tf.convert_to_tensor(self.num_results) def init_bar(num_results): self.bar = self.progress_bar_fn(int(num_results)) tf.py_function(init_bar, (num_results,), ()) return []