def get_tpu_addr(tpu_name=None): # Get the TPU's location if tpu_name is not None: return TPUClusterResolver(tpu_name).get_master() if 'COLAB_TPU_ADDR' in os.environ: return TPUClusterResolver().get_master() elif 'TPU_NAME' in os.environ: return TPUClusterResolver(os.environ['TPU_NAME']).get_master()
def get_tpu_addr(tpu_name=None): """Get the TPU's location.""" if tpu_name is not None: return TPUClusterResolver(tpu_name).get_master() if "COLAB_TPU_ADDR" in os.environ: return TPUClusterResolver().get_master() elif "TPU_NAME" in os.environ: return TPUClusterResolver(os.environ["TPU_NAME"]).get_master()
def q1(): global l_returnflag_group_size global l_linestatus_group_size returnflag_groups = np.unique(l_returnflag) linestatus_groups = np.unique(l_linestatus) l_returnflag_group_size = len(returnflag_groups) l_linestatus_group_size = len(linestatus_groups) inputs = [ tf.convert_to_tensor(l_shipdate, np.float32), tf.convert_to_tensor(l_returnflag, np.float32), tf.convert_to_tensor(l_linestatus, np.float32), tf.convert_to_tensor(l_quantity, np.float32), tf.convert_to_tensor(l_extendedprice, np.float32), tf.convert_to_tensor(l_discount, np.float32), tf.convert_to_tensor(l_tax, np.float32), tf.convert_to_tensor(returnflag_groups, np.float32), tf.convert_to_tensor(linestatus_groups, np.float32) ] tpu_computation = tpu.rewrite(q1_computation, inputs) tpu_grpc_url = TPUClusterResolver( tpu=[os.environ['TPU_NAME']]).get_master() with tf.Session(tpu_grpc_url) as sess: sess.run(tpu.initialize_system()) sess.run(tf.global_variables_initializer()) for i in range(0, 5): res = sess.run(tpu_computation) sess.run(tpu.shutdown_system()) print(res) return res
def run(size): a_ = [] b_ = [] c_ = [] for i in range(size): a_.append((i * 1.0 + 4.0) * 2.5) b_.append((i * 1.0 + 5.0) * 2.5) c_.append((i * 1.0 + 6.0) * 0.1) inputs = [tf.constant(a_), tf.constant(b_), tf.constant(c_)] tpu_computation = tpu.rewrite(expression, inputs) tpu_grpc_url = TPUClusterResolver( tpu=[os.environ['TPU_NAME']]).get_master() with tf.Session(tpu_grpc_url) as sess: sess.run(tpu.initialize_system()) t1 = time() sess.run(tf.global_variables_initializer()) sess.run(tpu_computation) t2 = time() print(str(size) + " : " + str(t2 - t1)) sess.run(tpu.shutdown_system()) print('Done !')
def main(args): # unpack the tensor batch to be used as the list of inputs of the TPU function dataset = train_input_fn() iterator = dataset.make_one_shot_iterator() features, labels = iterator.get_next() # mark part of the graph to be run on the TPUs global_step_tensor, loss_tensor = tf.contrib.tpu.rewrite(tpu_computation, [features, labels]) # utility ops tpu_init = tf.contrib.tpu.initialize_system() tpu_shutdown = tf.contrib.tpu.shutdown_system() variables_init = tf.global_variables_initializer() saver = tf.train.Saver() # get the TPU resource's grpc url # Note: when running on CMLE, args.tpu should be left as None tpu_grpc_url = TPUClusterResolver(tpu=args.tpu).get_master() sess = tf.Session(tpu_grpc_url) sess.run(tpu_init) sess.run(variables_init) for i in range(args.max_steps): # the tensor values in the TPU function are returned in a list, and the operations in the TPU function are called with no return value global_step, loss = sess.run([global_step_tensor, loss_tensor]) if i % args.save_checkpoints_steps == 0: saver.save(sess, os.path.join(args.model_dir, 'model.ckpt'), global_step=global_step) tf.logging.info('global_step: {}, loss: {}'.format(global_step, loss)) sess.run(tpu_shutdown)
def create_labels(input_tfrecord_path, output_tfrecord_path, dataset_preprocess_fn, embedding_fn, label_fn, write_fn=None, batch_size=64, parallel_calls=1): """Creates a new set of labels for a single chunk. Args: input_tfrecord_path: String with input TF Record file. output_tfrecord_path: String with input TF Record file. dataset_preprocess_fn: Preprocessing function applied to dataset. embedding_fn: Embedding function applied to the dataset tensor. label_fn: Label function applied to the (after sess.run). write_fn: Function to write TF Record to TF Record writer. batch_size: Optional integer with batch_size. """ tf.logging.info("Input: {}\nOutput: {}".format(input_tfrecord_path, output_tfrecord_path)) if write_fn is None: write_fn = write_imagenet if FLAGS.tpu_name: cluster = TPUClusterResolver(tpu=[FLAGS.tpu_name]) else: cluster = None config = tf.contrib.tpu.RunConfig(cluster=cluster) # Load the data in the chunk. input_dataset = tf.data.TFRecordDataset(input_tfrecord_path) input_dataset = input_dataset.map(dataset_preprocess_fn, parallel_calls) input_dataset = input_dataset.batch(batch_size) next_node = input_dataset.make_one_shot_iterator().get_next() embedding = embedding_fn(next_node) with tf.Session(cluster.get_master(), config=config.session_config) as sess: with tf.python_io.TFRecordWriter(output_tfrecord_path) as writer: sess.run(tf.global_variables_initializer()) while True: try: embedded = sess.run(embedding) results = label_fn(embedded) write_fn(writer, results) except tf.errors.OutOfRangeError: break
def apply_comp(inputs): tpu_computation = tpu.rewrite(apply, inputs) tpu_grpc_url = TPUClusterResolver( tpu=[os.environ['TPU_NAME']]).get_master() with tf.Session(tpu_grpc_url) as sess: sess.run(tpu.initialize_system()) sess.run(tf.global_variables_initializer()) t1 = time() sess.run(tpu_computation) t2 = time() sess.run(tpu.shutdown_system()) print(t2 - t1)
def filter_sum(): inputs = [tf.convert_to_tensor(l_quantity, np.float32)] tpu_computation = tpu.rewrite(filter_sum_computation, inputs) tpu_grpc_url = TPUClusterResolver( tpu=[os.environ['TPU_NAME']]).get_master() with tf.Session(tpu_grpc_url) as sess: sess.run(tpu.initialize_system()) sess.run(tf.global_variables_initializer()) for i in range(0, 5): res = sess.run(tpu_computation) sess.run(tpu.shutdown_system()) print(res) return res
def main(_): tpu_grpc_url = None if config.use_tpu: tpu_grpc_url = TPUClusterResolver(tpu=config.tpu_name).get_master() run_config = tpu.RunConfig( master=tpu_grpc_url, evaluation_master=tpu_grpc_url, model_dir=config.log_dir, session_config=tf.ConfigProto(allow_soft_placement=True), tpu_config=tpu.TPUConfig(config.tpu_iterations, config.tpu_shards) ) batch_size = config.batch_size * config.tpu_shards if config.use_tpu else config.batch_size est = Estimator( model_fn=estimator.model_fn, use_tpu=config.use_tpu, train_batch_size=batch_size, eval_batch_size=batch_size, params={ "use_tpu": config.use_tpu, "data_dir": config.data_dir, "buffer_size": config.buffer_size, "data_format": "NCHW" if config.use_tpu else "NHWC", "z_dim": config.z_dim, "D_lr": config.d_lr, "G_lr": config.g_lr, "data_seed": config.data_shuffle_seed, "data_map_parallelism": config.data_map_parallelism }, config=run_config ) if config.train: est.train( input_fn=estimator.train_input_fn, max_steps=config.train_steps ) if config.eval: est.evaluate( input_fn=estimator.eval_input_fn, steps=config.eval_steps ) elif config.predict: est.predict( input_fn=lambda params: estimator.predict_input_fn(params, config.predict_class), predict_keys=['G2'] )
def group_by(): unique_groups = np.unique(l_returnflag) inputs = [ tf.convert_to_tensor(l_quantity, np.float32), tf.convert_to_tensor(l_returnflag, np.float32), tf.convert_to_tensor(unique_groups, np.float32) ] tpu_computation = tpu.rewrite(group_by_computation, inputs) tpu_grpc_url = TPUClusterResolver( tpu=[os.environ['TPU_NAME']]).get_master() with tf.Session(tpu_grpc_url) as sess: sess.run(tpu.initialize_system()) sess.run(tf.global_variables_initializer()) for i in range(0, 5): res = sess.run(tpu_computation) sess.run(tpu.shutdown_system()) print(res)
def setup_model(self): print("*** Thawing model from JSON ***") with open(self.experiment_env.model_json, "r") as fptr: json_string = fptr.read() model = model_from_json(json_string) # type:Model model.load_weights(self.experiment_env.final_weights) adam = optimizers.Adam( lr=self.config["RNN-train"].getfloat("initial_lr")) model.compile(loss='binary_crossentropy', optimizer=adam) model.summary() if self.config["RNN-train"].getboolean("use_tpu"): model = tf.contrib.tpu.keras_to_tpu_model( model, strategy=tf.contrib.tpu.TPUDistributionStrategy( tf.contrib.cluster_resolver.TPUClusterResolver( tpu=TPUClusterResolver( tpu=[os.environ['TPU_NAME']]).get_master()))) return model
def run(): S0 = 100. K0 = 100. r0 = 0.05 T0 = 1.0 v0 = 0.2 S = tf.constant(S0) K = tf.constant(K0) r = tf.constant(r0) T = tf.constant(T0) v = tf.constant(v0) inputs = [S, K, r, T, v] tpu_computation = tpu.rewrite(blackscholes, inputs) tpu_grpc_url = TPUClusterResolver( tpu=[os.environ['TPU_NAME']]).get_master() timer(tpu_computation, tpu_grpc_url) print('Done !')
def timer(inputs): reps = 2 times = [] for i in range(reps): t1 = time() tpu_computation = tpu.rewrite(blackscholes, inputs) tpu_grpc_url = TPUClusterResolver( tpu=[os.environ['TPU_NAME']]).get_master() with tf.Session(tpu_grpc_url) as sess: sess.run(tpu.initialize_system()) sess.run(tf.global_variables_initializer()) sess.run(tpu_computation) sess.run(tpu.shutdown_system()) t2 = time() print(str(i) + "_ : " + str(t2 - t1)) times.append(t2 - t1) print(sum(times) / reps)
def __init__(self, N, batch_size=32, archive_fit_samples=64, use_tpu=None, log_path='logs/tensorboard'): self.N = N self.batch_size = batch_size self.model = None self.archive_fit_samples = archive_fit_samples self.position_archive = [] self.tpu_grpc_url = use_tpu tpu_name_environ_key = 'TPU_NAME' # Check has server got TPU if use_tpu is not False and tpu_name_environ_key in os.environ: tpu_name = os.environ[tpu_name_environ_key].strip() if tpu_name != "": self.is_tpu = True self.tpu_grpc_url = TPUClusterResolver( tpu=[os.environ[tpu_name_environ_key]]).get_master() # TODO write an if condition to validate and resolve the TPU url provided self.__loss_functions = [ 'categorical_crossentropy', 'binary_crossentropy' ] self.model_name = time.strftime('GM{0}-%y%m%dT%H%M%S').format('%02d' % N) # print(self.model_name) log_path = os.path.join(log_path, self.model_name) if not os.path.exists(log_path): os.makedirs(log_path) self.callback = TensorBoard(log_path)
def model_train(self): # 1. Build the graph train_graph = tf.Graph() # Set the graph to default to ensure that it is ready for training with train_graph.as_default(): # Load the model inputs input_data_logdesignid_enc, input_data_logdesignid_dec, target_logdesignid, lr, source_sequence_length, max_source_sequence_length = self.get_model_inputs( ) embedding_encoder = tf.get_variable("embedding_encoder", [ len(self.logdesignid_int_to_vocab), self.embedding_size_logdesignid ]) enc_embed_input_logdesignid = tf.nn.embedding_lookup( embedding_encoder, input_data_logdesignid_enc) print("[model_train] enc_embed_input_logdesignid:") self.print_activations(enc_embed_input_logdesignid) embedding_decoder = tf.get_variable("embedding_decoder", [ len(self.logdesignid_int_to_vocab), self.embedding_size_logdesignid ]) dec_embed_input_logdesignid = tf.nn.embedding_lookup( embedding_decoder, input_data_logdesignid_dec) print("[model_train] dec_embed_input_logdesignid:") self.print_activations(dec_embed_input_logdesignid) input_data_enc = enc_embed_input_logdesignid input_data_dec = dec_embed_input_logdesignid # targets = input_data_enc #autoencoder: target equals to input # targets = self.player_logid_test with tf.name_scope('seq2seq'): # Create the training and inference logits training_decoder_outputs, logits, enc_state = self.seq2seq_model( input_data_enc, input_data_dec, target_logdesignid, lr, source_sequence_length, max_source_sequence_length, source_sequence_length, len(self.logdesignid_int_to_vocab), len(self.logdesignid_int_to_vocab), self.rnn_size, self.rnn_num_layers) masks = tf.sequence_mask(source_sequence_length, max_source_sequence_length, dtype=tf.float32, name='masks') with tf.name_scope("optimization"): # Loss function print('[model_train] training_logits:', logits.get_shape()) print('[model_train] targets', target_logdesignid.get_shape()) crossent = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=target_logdesignid, logits=logits) cost = (tf.reduce_sum(crossent * masks) / (self.batch_size * self.maxlen)) tf.summary.scalar('loss', cost) # Calculate and clip gradients params = tf.trainable_variables() gradients = tf.gradients(cost, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5) # Optimizer optimizer = tf.train.AdamOptimizer(lr) train_op = optimizer.apply_gradients( zip(clipped_gradients, params)) # 2.Start train checkpoint = self.model_file + "best_model.ckpt" tpu_grpc_url = TPUClusterResolver( tpu_names=[os.environ['TPU_NAME']]).get_master() with tf.Session(tpu_grpc_url, graph=train_graph) as sess: merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(self.log_file + 'train', sess.graph) test_writer = tf.summary.FileWriter(self.log_file + 'test') sess.run(tf.global_variables_initializer()) max_batchsize = self.train_size // self.batch_size # for epoch_i in range(1, self.epochs + 1): epoch_i = 1 test_generator = self.generator_batches(datatype='test') for batch_i, (pad_enc_logdesignid_batch, pad_dec_logdesignid_batch, sources_lengths, train_targets_batch) in enumerate( self.generator_batches(datatype='train')): # print('train_targets_batch',train_targets_batch) if (batch_i % max_batchsize) + 1 == max_batchsize: epoch_i += 1 if epoch_i >= self.epochs: break # Training step with tf.name_scope('loss'): # try: # print('train',pad_enc_logdesignid_batch,pad_dec_logdesignid_batch,train_targets_batch) summary, _, loss = sess.run( [merged, train_op, cost], { input_data_logdesignid_enc: pad_enc_logdesignid_batch, input_data_logdesignid_dec: pad_dec_logdesignid_batch, target_logdesignid: train_targets_batch, lr: self.learning_rate, source_sequence_length: sources_lengths, }) # except: train_writer.add_summary(summary, batch_i) # Debug message updating us on the status of the training if batch_i % self.display_step == 0: (pad_enc_valid_logdesignid_batch, pad_dec_valid_logdesignid_batch, valid_sources_lengths, valid_targets_batch) = next(test_generator) # Calculate validation cost # print('test',pad_enc_valid_logdesignid_batch,pad_dec_valid_logdesignid_batch,valid_targets_batch) summary, validation_loss = sess.run( [merged, cost], { input_data_logdesignid_enc: pad_enc_valid_logdesignid_batch, input_data_logdesignid_dec: pad_dec_valid_logdesignid_batch, target_logdesignid: valid_targets_batch, lr: self.learning_rate, source_sequence_length: valid_sources_lengths, }) test_writer.add_summary(summary, batch_i) print( 'Epoch {:>3}/{} Batch {:>4}/{} - Loss: {:>6.3f} - Validation loss: {:>6.3f}' .format(epoch_i, self.epochs, (batch_i % max_batchsize) + 1, max_batchsize, loss, validation_loss)) # if epoch_i % self.saver_step == 0 and ((batch_i % max_batchsize)+1) % max_batchsize == 0: if ((batch_i % max_batchsize) + 1) % self.saver_step == 0: saver = tf.train.Saver() saver.save( sess, os.path.join( os.getcwd(), self.model_file + "epoch" + str(epoch_i) + "batch" + str((batch_i % max_batchsize) + 1) + ".ckpt")) # Save Model # saver = tf.train.Saver() # saver.save(sess, checkpoint) print('Model Trained and Saved')
slice3 = tf.slice(fijk, [i + d3, j, 0], [1, 1, size]) slice4 = tf.slice(fijk, [i - d4, j, 0], [1, 1, size]) fdo = c1 * slice1 + c2 * slice2 + c3 * slice3 + c4 * slice4 return fdo if __name__ == "__main__": dim1 = [0., 1., 2., 3., 4.] dim2 = [5., 6., 7., 8., 9.] dim3 = [10., 11., 12., 13., 14.] dim4 = [15., 16., 17., 18., 19.] fijk = tf.constant([[dim1, dim2, dim3, dim4], [dim2, dim3, dim4, dim1], [dim3, dim4, dim1, dim2], [dim4, dim1, dim2, dim3]]) i = tf.constant(1) j = tf.constant(1) dk = tf.constant(0) inputs = [fijk, i, j, dk] tpu_computation = tpu.rewrite(apply_, inputs) tpu_grpc_url = TPUClusterResolver( tpu=[os.environ['TPU_NAME']]).get_master() run(tpu_computation, tpu_grpc_url) print('Done !')
def required_tpu(self): return self._required_tpu # pylint: disable=g-long-lambda default_strategy = NamedDistribution( "Default", distribution_strategy_context._get_default_distribution_strategy, # pylint: disable=protected-access required_gpus=None) one_device_strategy = NamedDistribution( "OneDeviceCPU", lambda: one_device_lib.OneDeviceStrategy("/cpu:0"), required_gpus=None) tpu_strategy = NamedDistribution( "TPU", lambda: tpu_lib.TPUStrategy(TPUClusterResolver(""), steps_per_run=5), required_tpu=True) # Note that we disable prefetching for testing since prefetching makes # the input non-deterministic. mirrored_strategy_with_gpu_and_cpu = NamedDistribution( "MirroredCPUAndGPU", lambda: mirrored_lib.MirroredStrategy(["/gpu:0", "/cpu:0"], prefetch_on_device=False), required_gpus=1) mirrored_strategy_with_two_gpus = NamedDistribution( "Mirrored2GPUs", lambda: mirrored_lib.MirroredStrategy(["/gpu:0", "/gpu:1"], prefetch_on_device=False), required_gpus=2) adam_optimizer_v1_fn = NamedObject("AdamV1",
def train_and_eval(): """Trains a network on (self) supervised data.""" checkpoint_dir = os.path.join(FLAGS.workdir) if FLAGS.use_tpu: master = TPUClusterResolver(tpu=[os.environ['TPU_NAME']]).get_master() else: master = '' config = tf.contrib.tpu.RunConfig( model_dir=checkpoint_dir, tf_random_seed=FLAGS.get_flag_value('random_seed', None), master=master, evaluation_master=master, keep_checkpoint_every_n_hours=FLAGS.get_flag_value( 'keep_checkpoint_every_n_hours', 4), save_checkpoints_secs=FLAGS.get_flag_value('save_checkpoints_secs', 600), tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=TPU_ITERATIONS_PER_LOOP, tpu_job_name=FLAGS.tpu_worker_name)) # The global batch-sizes are passed to the TPU estimator, and it will pass # along the local batch size in the model_fn's `params` argument dict. estimator = tf.contrib.tpu.TPUEstimator( model_fn=get_self_supervision_model(FLAGS.task), model_dir=checkpoint_dir, config=config, use_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.batch_size, eval_batch_size=FLAGS.get_flag_value('eval_batch_size', FLAGS.batch_size)) if FLAGS.run_eval: data_fn = functools.partial(datasets.get_data, split_name=FLAGS.get_flag_value( 'val_split', 'val'), is_training=False, shuffle=False, num_epochs=1, drop_remainder=FLAGS.use_tpu) # Contrary to what the documentation claims, the `train` and the # `evaluate` functions NEED to have `max_steps` and/or `steps` set and # cannot make use of the iterator's end-of-input exception, so we need # to do some math for that here. num_samples = datasets.get_count( FLAGS.get_flag_value('val_split', 'val')) num_steps = num_samples // FLAGS.get_flag_value( 'eval_batch_size', FLAGS.batch_size) tf.logging.info('val_steps: %d', num_steps) for checkpoint in tf.contrib.training.checkpoints_iterator( estimator.model_dir, timeout=10 * 60): estimator.evaluate(checkpoint_path=checkpoint, input_fn=data_fn, steps=num_steps) hub_exporter = hub.LatestModuleExporter('hub', serving_input_fn) hub_exporter.export(estimator, os.path.join(checkpoint_dir, 'export/hub'), checkpoint) if tf.gfile.Exists(os.path.join(FLAGS.workdir, 'TRAINING_IS_DONE')): break # Evaluates the latest checkpoint on validation set. result = estimator.evaluate(input_fn=data_fn, steps=num_steps) return result else: train_data_fn = functools.partial( datasets.get_data, split_name=FLAGS.get_flag_value('train_split', 'train'), is_training=True, num_epochs=int(math.ceil(FLAGS.epochs)), drop_remainder=True) # We compute the number of steps and make use of Estimator's max_steps # arguments instead of relying on the Dataset's iterator to run out after # a number of epochs so that we can use 'fractional' epochs, which are # used by regression tests. (And because TPUEstimator needs it anyways.) num_samples = datasets.get_count( FLAGS.get_flag_value('train_split', 'train')) # Depending on whether we drop the last batch each epoch or only at the # ver end, this should be ordered differently for rounding. updates_per_epoch = num_samples // FLAGS.batch_size num_steps = int(math.ceil(FLAGS.epochs * updates_per_epoch)) tf.logging.info('train_steps: %d', num_steps) estimator.train(train_data_fn, max_steps=num_steps)
def required_tpu(self): return self._required_tpu # pylint: disable=g-long-lambda default_strategy = NamedDistribution( "Default", lambda: distribute_lib._default_distribution_strategy, # pylint: disable=protected-access required_gpus=None) one_device_strategy = NamedDistribution( "OneDeviceCPU", lambda: one_device_lib.OneDeviceStrategy("/cpu:0"), required_gpus=None) tpu_strategy = NamedDistribution( "TPU", lambda: tpu_lib.TPUStrategy(TPUClusterResolver("")), required_tpu=True) # Note that we disable prefetching for testing since prefetching makes # the input non-deterministic. mirrored_strategy_with_gpu_and_cpu = NamedDistribution( "MirroredCPUAndGPU", lambda: mirrored_lib.MirroredStrategy(["/gpu:0", "/cpu:0"], prefetch_on_device=False), required_gpus=1) mirrored_strategy_with_two_gpus = NamedDistribution( "Mirrored2GPUs", lambda: mirrored_lib.MirroredStrategy(["/gpu:0", "/gpu:1"], prefetch_on_device=False), required_gpus=2) multi_worker_strategy_with_cpu = NamedDistribution(
def main(args): # Unpack the tensor batch to be used to set up the infeed/outfeed queues. dataset = train_input_fn() iterator = dataset.make_one_shot_iterator() features, labels = iterator.get_next() infeed_ops, outfeed_ops = setup_feed(features, labels, num_shards=8) # Wrap the tpu computation function to be run in a loop. def computation_loop(): return tf.contrib.tpu.repeat( args.max_steps, partial(tpu_computation_with_infeed, batch_size=16, num_shards=8)) # Since we are using infeed/outfeed queues, tensors are not explicitly passed in or returned. tpu_computation_loop = tf.contrib.tpu.batch_parallel(computation_loop, num_shards=8) # utility ops tpu_init = tf.contrib.tpu.initialize_system() tpu_shutdown = tf.contrib.tpu.shutdown_system() variables_init = tf.global_variables_initializer() saver = tf.train.Saver() # get the TPU resource's grpc url # Note: when running on AI Platform, args.tpu should be left as None tpu_grpc_url = TPUClusterResolver(tpu=args.tpu).get_master() sess = tf.Session(tpu_grpc_url) # Use separate threads to run infeed and outfeed. def _run_infeed(): for i in range(args.max_steps): sess.run(infeed_ops) if i % args.save_checkpoints_steps == 0: print('infeed {}'.format(i)) def _run_outfeed(): for i in range(args.max_steps): outfeed_data = sess.run(outfeed_ops) if i % args.save_checkpoints_steps == 0: print('outfeed {}'.format(i)) print('data returned from outfeed: {}'.format(outfeed_data)) saver.save(sess, os.path.join(args.model_dir, 'model.ckpt'), global_step=i) infeed_thread = threading.Thread(target=_run_infeed) outfeed_thread = threading.Thread(target=_run_outfeed) sess.run(tpu_init) sess.run(variables_init) infeed_thread.start() outfeed_thread.start() sess.run(tpu_computation_loop) infeed_thread.join() outfeed_thread.join() sess.run(tpu_shutdown) saver.save(sess, os.path.join(args.model_dir, 'model.ckpt'), global_step=args.max_steps)
def train(): def getTrainBatch(): labels = [] arr = np.zeros([BATCH_SIZE, MAX_WORDS_IN_REVIEW, EMBEDDING_SIZE]) for i in range(BATCH_SIZE): if (i % 2 == 0): num = randint(0, 12499) labels.append([1, 0]) else: num = randint(12500, 24999) labels.append([0, 1]) arr[i] = training_data_embedded[num, :, :] return arr, labels # Call implementation glove_array, glove_dict = load_glove_embeddings() training_data_text = load_data() training_data_embedded = embedd_data(training_data_text, glove_array, glove_dict) input_data, labels, dropout_keep_prob, optimizer, accuracy, loss = \ imp.define_graph() # tensorboard tf.summary.scalar("training_accuracy", accuracy) tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() # saver all_saver = tf.train.Saver() tpu_grpc_url = TPUClusterResolver( tpu=[os.environ['TPU_NAME']]).get_master() sess = tf.InteractiveSession(tpu_grpc_url) sess.run(tpu.initialize_system()) sess.run(tf.global_variables_initializer()) logdir = "tensorboard/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") + "/" writer = tf.summary.FileWriter(logdir, sess.graph) for i in range(iterations): batch_data, batch_labels = getTrainBatch() sess.run(optimizer, { input_data: batch_data, labels: batch_labels, dropout_keep_prob: 0.6 }) if (i % 50 == 0): loss_value, accuracy_value, summary = sess.run( [loss, accuracy, summary_op], { input_data: batch_data, labels: batch_labels }) writer.add_summary(summary, i) print("Iteration: ", i) print("loss", loss_value) print("acc", accuracy_value) if (i % SAVE_FREQ == 0 and i != 0): if not os.path.exists(checkpoints_dir): os.makedirs(checkpoints_dir) save_path = all_saver.save(sess, checkpoints_dir + "/trained_model.ckpt", global_step=i) print("Saved model to %s" % save_path) #sess.close() sess.run(tpu.shutdown_system())
def __init__(self, policy, ob_space, ac_space, nenvs, nsteps, ent_coef, q_coef, gamma, max_grad_norm, lr, rprop_alpha, rprop_epsilon, total_timesteps, lrschedule, c, trust_region, alpha, delta): if USING_TPUS: tpu_grpc_url = TPUClusterResolver( tpu=[os.environ['TPU_NAME']]).get_master() sess = tf.Session(tpu_grpc_url) sess.run(tpu.initialize_system()) else: sess = get_session() nact = ac_space.n nbatch = nenvs * nsteps A = tf.placeholder(tf.int32, [nbatch]) # actions D = tf.placeholder(tf.float32, [nbatch]) # dones R = tf.placeholder(tf.float32, [nbatch]) # rewards, not returns MU = tf.placeholder(tf.float32, [nbatch, nact]) # mu's LR = tf.placeholder(tf.float32, []) eps = 1e-6 step_ob_placeholder = tf.placeholder(dtype=ob_space.dtype, shape=(nenvs, ) + ob_space.shape) train_ob_placeholder = tf.placeholder(dtype=ob_space.dtype, shape=(nenvs * (nsteps + 1), ) + ob_space.shape) with tf.variable_scope('acer_model', reuse=tf.AUTO_REUSE): step_model = policy(observ_placeholder=step_ob_placeholder, sess=sess) train_model = policy(observ_placeholder=train_ob_placeholder, sess=sess) params = find_trainable_variables("acer_model") print("Params {}".format(len(params))) for var in params: print(var) # create polyak averaged model ema = tf.train.ExponentialMovingAverage(alpha) ema_apply_op = ema.apply(params) def custom_getter(getter, *args, **kwargs): v = ema.average(getter(*args, **kwargs)) print(v.name) return v with tf.variable_scope("acer_model", custom_getter=custom_getter, reuse=True): polyak_model = policy(observ_placeholder=train_ob_placeholder, sess=sess) # Notation: (var) = batch variable, (var)s = seqeuence variable, (var)_i = variable index by action at step i # action probability distributions according to train_model, polyak_model and step_model # poilcy.pi is probability distribution parameters; to obtain distribution that sums to 1 need to take softmax train_model_p = tf.nn.softmax(train_model.pi) polyak_model_p = tf.nn.softmax(polyak_model.pi) step_model_p = tf.nn.softmax(step_model.pi) v = tf.reduce_sum(train_model_p * train_model.q, axis=-1) # shape is [nenvs * (nsteps + 1)] # strip off last step f, f_pol, q = map(lambda var: strip(var, nenvs, nsteps), [train_model_p, polyak_model_p, train_model.q]) # Get pi and q values for actions taken f_i = get_by_index(f, A) q_i = get_by_index(q, A) # Compute ratios for importance truncation rho = f / (MU + eps) rho_i = get_by_index(rho, A) # Calculate Q_retrace targets qret = q_retrace(R, D, q_i, v, rho_i, nenvs, nsteps, gamma) # Calculate losses # Entropy # entropy = tf.reduce_mean(strip(train_model.pd.entropy(), nenvs, nsteps)) entropy = tf.reduce_mean(cat_entropy_softmax(f)) # Policy Graident loss, with truncated importance sampling & bias correction v = strip(v, nenvs, nsteps, True) check_shape([qret, v, rho_i, f_i], [[nenvs * nsteps]] * 4) check_shape([rho, f, q], [[nenvs * nsteps, nact]] * 2) # Truncated importance sampling adv = qret - v logf = tf.log(f_i + eps) gain_f = logf * tf.stop_gradient( adv * tf.minimum(c, rho_i)) # [nenvs * nsteps] loss_f = -tf.reduce_mean(gain_f) # Bias correction for the truncation adv_bc = (q - tf.reshape(v, [nenvs * nsteps, 1]) ) # [nenvs * nsteps, nact] logf_bc = tf.log(f + eps) # / (f_old + eps) check_shape([adv_bc, logf_bc], [[nenvs * nsteps, nact]] * 2) gain_bc = tf.reduce_sum( logf_bc * tf.stop_gradient(adv_bc * tf.nn.relu(1.0 - (c / (rho + eps))) * f), axis=1) #IMP: This is sum, as expectation wrt f loss_bc = -tf.reduce_mean(gain_bc) loss_policy = loss_f + loss_bc # Value/Q function loss, and explained variance check_shape([qret, q_i], [[nenvs * nsteps]] * 2) ev = q_explained_variance(tf.reshape(q_i, [nenvs, nsteps]), tf.reshape(qret, [nenvs, nsteps])) loss_q = tf.reduce_mean(tf.square(tf.stop_gradient(qret) - q_i) * 0.5) # Net loss check_shape([loss_policy, loss_q, entropy], [[]] * 3) loss = loss_policy + q_coef * loss_q - ent_coef * entropy if trust_region: g = tf.gradients(-(loss_policy - ent_coef * entropy) * nsteps * nenvs, f) #[nenvs * nsteps, nact] # k = tf.gradients(KL(f_pol || f), f) k = -f_pol / ( f + eps ) #[nenvs * nsteps, nact] # Directly computed gradient of KL divergence wrt f k_dot_g = tf.reduce_sum(k * g, axis=-1) adj = tf.maximum(0.0, (tf.reduce_sum(k * g, axis=-1) - delta) / (tf.reduce_sum(tf.square(k), axis=-1) + eps)) #[nenvs * nsteps] # Calculate stats (before doing adjustment) for logging. avg_norm_k = avg_norm(k) avg_norm_g = avg_norm(g) avg_norm_k_dot_g = tf.reduce_mean(tf.abs(k_dot_g)) avg_norm_adj = tf.reduce_mean(tf.abs(adj)) g = g - tf.reshape(adj, [nenvs * nsteps, 1]) * k grads_f = -g / ( nenvs * nsteps ) # These are turst region adjusted gradients wrt f ie statistics of policy pi grads_policy = tf.gradients(f, params, grads_f) grads_q = tf.gradients(loss_q * q_coef, params) grads = [ gradient_add(g1, g2, param) for (g1, g2, param) in zip(grads_policy, grads_q, params) ] avg_norm_grads_f = avg_norm(grads_f) * (nsteps * nenvs) norm_grads_q = tf.global_norm(grads_q) norm_grads_policy = tf.global_norm(grads_policy) else: grads = tf.gradients(loss, params) if max_grad_norm is not None: grads, norm_grads = tf.clip_by_global_norm(grads, max_grad_norm) grads = list(zip(grads, params)) trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=rprop_alpha, epsilon=rprop_epsilon) _opt_op = trainer.apply_gradients(grads) # so when you call _train, you first do the gradient step, then you apply ema with tf.control_dependencies([_opt_op]): _train = tf.group(ema_apply_op) lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule) # Ops/Summaries to run, and their names for logging run_ops = [ _train, loss, loss_q, entropy, loss_policy, loss_f, loss_bc, ev, norm_grads ] names_ops = [ 'loss', 'loss_q', 'entropy', 'loss_policy', 'loss_f', 'loss_bc', 'explained_variance', 'norm_grads' ] if trust_region: run_ops = run_ops + [ norm_grads_q, norm_grads_policy, avg_norm_grads_f, avg_norm_k, avg_norm_g, avg_norm_k_dot_g, avg_norm_adj ] names_ops = names_ops + [ 'norm_grads_q', 'norm_grads_policy', 'avg_norm_grads_f', 'avg_norm_k', 'avg_norm_g', 'avg_norm_k_dot_g', 'avg_norm_adj' ] def train(obs, actions, rewards, dones, mus, states, masks, steps): cur_lr = lr.value_steps(steps) td_map = { train_model.X: obs, polyak_model.X: obs, A: actions, R: rewards, D: dones, MU: mus, LR: cur_lr } if states is not None: td_map[train_model.S] = states td_map[train_model.M] = masks td_map[polyak_model.S] = states td_map[polyak_model.M] = masks if USING_TPUS: return names_ops, sess.run(tpu.rewrite( run_ops, td_map))[1:] # strip off _train else: return names_ops, sess.run(run_ops, td_map)[1:] # strip off _train def _step(observation, **kwargs): return step_model._evaluate( [step_model.action, step_model_p, step_model.state], observation, **kwargs) self.train = train self.save = functools.partial(save_variables, sess=sess, variables=params) self.train_model = train_model self.step_model = step_model self._step = _step self.step = self.step_model.step self.initial_state = step_model.initial_state tf.global_variables_initializer().run(session=sess)
"""Simple scritpt from google TPU collab to measure teraflops [https://colab.research.google.com/notebooks/tpu.ipynb] """ from tensorflow.contrib import tpu from tensorflow.contrib.cluster_resolver import TPUClusterResolver #pylint: disable=E0611 import time import tensorflow as tf #tpu_address = ['node-1', 'node-2'] # Apparently multiple TPUs for a single session are not # yet suported tpu_address = ['node-1'] tpu_cluster = TPUClusterResolver(tpu=tpu_address).get_master() N = 4096 COUNT = 100 def flops(): x = tf.random_uniform([N, N]) y = tf.random_uniform([N, N]) def _matmul(x, y): return tf.tensordot(x, y, axes=[[1], [0]]), y return tf.reduce_sum(tpu.repeat(COUNT, _matmul, [x, y])) tpu_ops = tpu.batch_parallel(flops, [], num_shards=8)
from bert import modeling from bert.run_pretraining import input_fn_builder, model_fn_builder # configure logging log = logging.getLogger('tensorflow') log.setLevel(logging.INFO) # create formatter and add it to the handlers formatter = logging.Formatter('%(asctime)s : %(message)s') sh = logging.StreamHandler() sh.setLevel(logging.INFO) sh.setFormatter(formatter) log.handlers = [sh] log.info("Using TPU runtime") USE_TPU = True tpu_cluster_resolver = TPUClusterResolver(tpu='greek-bert', zone='us-central1-a') # SETUP FOLDERS with tf.Session(tpu_cluster_resolver.get_master()) as session: print(tpu_cluster_resolver.get_master()) HOME_PATH = "gs://greek_bert" # @param {type:"string"} MODEL_DIR = "greek_bert" # @param {type:"string"} PRETRAINING_DIR = "greek_tfrecords" # @param {type:"string"} VOC_FNAME = "vocab.txt" # @param {type:"string"} # Input data pipeline config TRAIN_BATCH_SIZE = 256 # @param {type:"integer"} MAX_PREDICTIONS = 75 # @param {type:"integer"} MAX_SEQ_LENGTH = 512 # @param {type:"integer"} MASKED_LM_PROB = 0.15 # @param
def axy_computation(a, x, y): return a * x + y output_shape = [80, 80] inputs = [ 3.0, tf.random_uniform(output_shape, dtype=tf.float32), tf.random_uniform(output_shape, dtype=tf.float32), ] if use_tpu: print('Setting up TPU') tpu_grpc_url = TPUClusterResolver(tpu=[tpu_name]).get_master() computation = tpu.rewrite(axy_computation, inputs) else: print( 'TPU IS NOT ENABLED (pass a TPU name or grpc://ip:port as the TPU_NAME envvar)' ) computation = tf.py_func(axy_computation, inputs, tf.float32) tpu_grpc_url = None with tf.Session(tpu_grpc_url) as sess: if use_tpu: print('Running TPU initializer') sess.run(tpu.initialize_system()) sess.run(tf.global_variables_initializer()) print('Running computation {}'.format(computation)) output = sess.run(computation)
# export TPU_NAME=aeon # wget --show-progress --continue -O shakespeare.txt http://www.gutenberg.org/files/100/100-0.txt import numpy as np import six import tensorflow as tf import time import os from tensorflow.contrib import tpu from tensorflow.contrib.cluster_resolver import TPUClusterResolver # This address identifies the TPU we'll use when configuring TensorFlow. TPU_WORKER = TPUClusterResolver(tpu=[os.environ['TPU_NAME']]).get_master() SHAKESPEARE_TXT = 'shakespeare1.txt' tf.logging.set_verbosity(tf.logging.INFO) def transform(txt, pad_to=None): # drop any non-ascii characters output = np.asarray([ord(c) for c in txt if ord(c) < 255], dtype=np.int32) if pad_to is not None: output = output[:pad_to] output = np.concatenate([ np.zeros([pad_to - len(txt)], dtype=np.int32), output, ]) return output
@property def required_tpu(self): return self._required_tpu # pylint: disable=g-long-lambda default_strategy = NamedDistribution( "Default", distribution_strategy_context._get_default_distribution_strategy, # pylint: disable=protected-access required_gpus=None) one_device_strategy = NamedDistribution( "OneDeviceCPU", lambda: one_device_lib.OneDeviceStrategy("/cpu:0"), required_gpus=None) tpu_strategy = NamedDistribution( "TPU", lambda: tpu_lib.TPUStrategy( TPUClusterResolver(""), steps_per_run=2), required_tpu=True) tpu_strategy_one_step = NamedDistribution( "TPUOneStep", lambda: tpu_lib.TPUStrategy( TPUClusterResolver(""), steps_per_run=1), required_tpu=True) mirrored_strategy_with_one_cpu = NamedDistribution( "Mirrored1CPU", lambda: mirrored_lib.MirroredStrategy(["/cpu:0"])) mirrored_strategy_with_one_gpu = NamedDistribution( "Mirrored1GPU", lambda: mirrored_lib.MirroredStrategy(["/gpu:0"]), required_gpus=1) mirrored_strategy_with_gpu_and_cpu = NamedDistribution( "MirroredCPUAndGPU", lambda: mirrored_lib.MirroredStrategy(["/gpu:0", "/cpu:0"]),
def train_and_eval(): """Trains a network on (self) supervised data.""" checkpoint_dir = FLAGS.get_flag_value("checkpoint", FLAGS.workdir) tf.gfile.MakeDirs(checkpoint_dir) if FLAGS.tpu_name: cluster = TPUClusterResolver(tpu=[FLAGS.tpu_name]) else: cluster = None # tf.logging.info("master: %s", master) config = RunConfig( model_dir=checkpoint_dir, tf_random_seed=FLAGS.random_seed, cluster=cluster, keep_checkpoint_max=None, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=TPUConfig(iterations_per_loop=TPU_ITERATIONS_PER_LOOP)) # Optionally resume from a stored checkpoint. if FLAGS.path_to_initial_ckpt: warm_start_from = tf.estimator.WarmStartSettings( ckpt_to_initialize_from=FLAGS.path_to_initial_ckpt, # The square bracket is important for loading all the # variables from GLOBAL_VARIABLES collection. # See https://www.tensorflow.org/api_docs/python/tf/estimator/WarmStartSettings # pylint: disable=line-too-long # section vars_to_warm_start for more details. vars_to_warm_start=[FLAGS.vars_to_restore] ) else: warm_start_from = None # The global batch-sizes are passed to the TPU estimator, and it will pass # along the local batch size in the model_fn's `params` argument dict. estimator = TPUEstimator( model_fn=semi_supervised.get_model(FLAGS.task), model_dir=checkpoint_dir, config=config, use_tpu=FLAGS.tpu_name is not None, train_batch_size=FLAGS.batch_size, eval_batch_size=FLAGS.get_flag_value("eval_batch_size", FLAGS.batch_size), warm_start_from=warm_start_from ) if FLAGS.run_eval: data_fn = functools.partial( datasets.get_data, split_name=FLAGS.val_split, preprocessing=FLAGS.get_flag_value("preprocessing_eval", FLAGS.preprocessing), is_training=False, shuffle=False, num_epochs=1, drop_remainder=True) # Contrary to what the documentation claims, the `train` and the # `evaluate` functions NEED to have `max_steps` and/or `steps` set and # cannot make use of the iterator's end-of-input exception, so we need # to do some math for that here. num_samples = datasets.get_count(FLAGS.val_split) num_steps = num_samples // FLAGS.get_flag_value("eval_batch_size", FLAGS.batch_size) tf.logging.info("val_steps: %d", num_steps) for checkpoint in checkpoints_iterator( estimator.model_dir, timeout=FLAGS.eval_timeout_mins * 60): result_dict_val = estimator.evaluate( checkpoint_path=checkpoint, input_fn=data_fn, steps=num_steps) hub_exporter = hub.LatestModuleExporter("hub", serving_input_fn) hub_exporter.export( estimator, os.path.join(checkpoint_dir, "export/hub"), checkpoint) # This is here instead of using the above `checkpoints_iterator`'s # `timeout_fn` param, because that would wait forever on failed # trainers which will never create this file. if tf.gfile.Exists(os.path.join(FLAGS.workdir, "TRAINING_IS_DONE")): break # Evaluates the latest checkpoint on validation set. result_dict_val = estimator.evaluate(input_fn=data_fn, steps=num_steps) tf.logging.info(result_dict_val) # Optionally evaluates the latest checkpoint on test set. if FLAGS.test_split: data_fn = functools.partial( datasets.get_data, split_name=FLAGS.test_split, preprocessing=FLAGS.get_flag_value("preprocessing_eval", FLAGS.preprocessing), is_training=False, shuffle=False, num_epochs=1, drop_remainder=True) num_samples = datasets.get_count(FLAGS.test_split) num_steps = num_samples // FLAGS.get_flag_value("eval_batch_size", FLAGS.batch_size) result_dict_test = estimator.evaluate(input_fn=data_fn, steps=num_steps) tf.logging.info(result_dict_test) return result_dict_val else: train_data_fn = functools.partial( datasets.get_data, split_name=FLAGS.train_split, preprocessing=FLAGS.preprocessing, is_training=True, num_epochs=None, # read data indefenitely for training drop_remainder=True) # We compute the number of steps and make use of Estimator's max_steps # arguments instead of relying on the Dataset's iterator to run out after # a number of epochs so that we can use "fractional" epochs, which are # used by regression tests. (And because TPUEstimator needs it anyways.) num_samples = datasets.get_count(FLAGS.train_split) if FLAGS.num_supervised_examples: num_samples = FLAGS.num_supervised_examples # Depending on whether we drop the last batch each epoch or only at the # ver end, this should be ordered differently for rounding. updates_per_epoch = num_samples // FLAGS.batch_size epochs = utils.str2intlist(FLAGS.schedule, strict_int=False)[-1] num_steps = int(math.ceil(epochs * updates_per_epoch)) tf.logging.info("train_steps: %d", num_steps) return estimator.train( train_data_fn, max_steps=num_steps)
def experiment(model_config): tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info("SCRIPT START") tf.logging.info("TPU resolver started") tpu_cluster_resolver = TPUClusterResolver( tpu=os.environ['TPU_NAME'], project=os.environ['PROJECT_NAME'], zone=os.environ['PROJECT_ZONE']) config = tpu_config.RunConfig( cluster=tpu_cluster_resolver, model_dir=model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"]), save_checkpoints_steps=500, save_summary_steps=250, tpu_config=tpu_config.TPUConfig( iterations_per_loop=500, num_shards=8, per_host_input_for_training=tpu_config.InputPipelineConfig.PER_HOST_V1)) # pylint: disable=line-too-long tf.logging.info("Creating datasets") urmp_train, urmp_eval, urmp_test = [ urmp_input.URMPInput(mode=mode, data_dir=model_config['data_path'], transpose_input=False, use_bfloat16=model_config['use_bfloat16']) for mode in ['train', 'eval', 'test'] ] tf.logging.info("Assigning TPUEstimator") # Optimize in a +supervised fashion until validation loss worsens separator = tpu_estimator.TPUEstimator( use_tpu=model_config["use_tpu"], model_fn=unet_separator, config=config, train_batch_size=model_config['batch_size'], eval_batch_size=model_config['batch_size'], predict_batch_size=model_config['batch_size'], params={ i: model_config[i] for i in model_config if (i != 'batch_size' and i != 'context') }) if model_config['load_model']: tf.logging.info("Load the model") current_step = estimator._load_global_step_from_checkpoint_dir( model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"])) if model_config['mode'] == 'train_and_eval': tf.logging.info("Train the model") # Should be an early stopping here, but it will come with tf 1.10 separator.train(input_fn=urmp_train.input_fn, steps=model_config['training_steps']) tf.logging.info("Supervised training finished!") tf.logging.info("Evaluate model") # Evaluate the model. eval_result = separator.evaluate( input_fn=urmp_eval.input_fn, steps=model_config['evaluation_steps']) tf.logging.info('Evaluation results: %s' % eval_result) elif model_config['mode'] == 'predict': tf.logging.info("Test results and save predicted sources:") predictions = separator.predict(input_fn=urmp_test.input_fn) for prediction in predictions: Test.save_prediction(prediction, estimates_path=model_config["estimates_path"], sample_rate=model_config["expected_sr"]) Utils.concat_and_upload( model_config["estimates_path"], model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"]))
import tensorflow as tf from tensorflow.contrib import tpu from tensorflow.contrib.cluster_resolver import TPUClusterResolver def axy_computation(a, x, y): return a * x + y inputs = [ 3.0, tf.ones([3, 3], tf.float32), tf.ones([3, 3], tf.float32), ] tpu_computation = tpu.rewrite(axy_computation, inputs) tpu_grpc_url = TPUClusterResolver().get_master() with tf.Session(tpu_grpc_url) as sess: sess.run(tpu.initialize_system()) sess.run(tf.global_variables_initializer()) output = sess.run(tpu_computation) with open("/output.txt", "w") as output_file: output_file.write(str(output)) print(output) sess.run(tpu.shutdown_system()) print('Done!')