def test_TrainingLogger_create_file(): """Test that EpochEndCallback() produces a file.""" with tempfile.TemporaryDirectory() as d: logger = telemetry.TrainingLogger(pathlib.Path(d)) logger.KerasEpochBeginCallback(0, {}) logger.KerasEpochEndCallback(0, {"loss": 1}) # Note that one is added to the epoch number. assert (pathlib.Path(d) / "epoch_001_telemetry.pbtxt").is_file()
def test_TrainingLogger_EpochTelemetry(): """Test that EpochTelemetry() returns protos.""" with tempfile.TemporaryDirectory() as d: logger = telemetry.TrainingLogger(pathlib.Path(d)) assert not logger.EpochTelemetry() logger.KerasEpochBeginCallback(0, {}) logger.KerasEpochEndCallback(0, {"loss": 1}) assert len(logger.EpochTelemetry()) == 1 logger.KerasEpochBeginCallback(1, {}) logger.KerasEpochEndCallback(1, {"loss": 1}) assert len(logger.EpochTelemetry()) == 2
def main(argv): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(" ".join( argv[1:]))) if not FLAGS.dataset_root: raise app.UsageError("--dataset_root must be a directory") dataset_root = pathlib.Path(FLAGS.dataset_root) assert dataset_root.is_dir() assert (dataset_root / "training").is_dir() assert (dataset_root / "validation").is_dir() assert (dataset_root / "testing").is_dir() if not FLAGS.model_path: raise app.UsageError("--model_path must be set") model_path = pathlib.Path(FLAGS.model_path) model_path.mkdir(parents=True, exist_ok=True) training_protos = LoadPositiveNegativeProtos(dataset_root / "training") validation_protos = LoadPositiveNegativeProtos(dataset_root / "validation") testing_protos = LoadPositiveNegativeProtos(dataset_root / "testing") app.Log( 1, "Number of training examples: %s.", humanize.Commas( len(training_protos.positive) + len(training_protos.negative)), ) app.Log( 1, "Number of validation examples: %s.", humanize.Commas( len(validation_protos.positive) + len(validation_protos.negative)), ) app.Log( 1, "Number of testing examples: %s.", humanize.Commas( len(testing_protos.positive) + len(testing_protos.negative)), ) sequence_length = FLAGS.sequence_length text = "\n\n".join([ p.src for p in training_protos.positive + training_protos.negative + validation_protos.positive + validation_protos.negative + testing_protos.positive + testing_protos.negative ]) app.Log(1, "Deriving atomizer from %s chars.", humanize.Commas(len(text))) atomizer = atomizers.AsciiCharacterAtomizer.FromText(text) app.Log(1, "Vocabulary size: %s.", humanize.Commas(len(atomizer.vocab))) app.Log(1, "Pickled atomizer to %s.", model_path / "atomizer.pkl") with open(model_path / "atomizer.pkl", "wb") as f: pickle.dump(atomizer, f) app.Log(1, "Encoding training corpus") x, y = ProtosToModelData(training_protos, sequence_length, atomizer) validation_data = None if validation_protos.positive: app.Log(1, "Encoding validation corpus") validation_data = ProtosToModelData(validation_protos, sequence_length, atomizer) app.Log(1, "Encoding test corpus") test_x, test_y = ProtosToModelData(testing_protos, sequence_length, atomizer) np.random.seed(FLAGS.seed) app.Log(1, "Building Keras model") model = BuildKerasModel( sequence_length=sequence_length, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, dnn_size=FLAGS.dnn_size, atomizer=atomizer, ) app.Log(1, "Training model") def OnEpochEnd(epoch, logs): """End-of-epoch model evaluate.""" del logs app.Log(1, "Evaluating model at epoch %d", epoch) score, accuracy = model.evaluate(test_x, Encode1HotLabels(test_y), batch_size=FLAGS.batch_size, verbose=0) app.Log(1, "Score: %.2f, Accuracy: %.2f", score * 100, accuracy * 100) logger = telemetry.TrainingLogger(pathlib.Path(FLAGS.model_path)) model.fit( x, Encode1HotLabels(y), epochs=FLAGS.num_epochs, batch_size=FLAGS.batch_size, verbose=True, shuffle=True, validation_data=validation_data, callbacks=[ keras.callbacks.ModelCheckpoint( FLAGS.model_path + "/weights_{epoch:03d}.hdf5", verbose=1, mode="min", save_best_only=False, ), keras.callbacks.LambdaCallback(on_epoch_end=OnEpochEnd), logger.KerasCallback(keras), ], )
def TrainingTelemetry( self) -> typing.List[telemetry_pb2.ModelEpochTelemetry]: """Get the training telemetry data.""" return telemetry.TrainingLogger(self.cache.path / 'logs').EpochTelemetry()
def Train(self, corpus) -> None: """Locked training. If there are cached epoch checkpoints, the one closest to the target number of epochs will be loaded, and the model will be trained for only the remaining number of epochs, if any. This means that calling this function twice will only actually train the model the first time, and all subsequent calls will be no-ops. This method must only be called when the model is locked. """ if self.is_trained: return data_generator = data_generators.TensorflowBatchGenerator( corpus, self.config.training) tf = self.InitTfGraph(inference=False) logger = telemetry.TrainingLogger(self.cache.path / 'logs') # training options # TODO(cec): Enable support for multiple optimizers: initial_learning_rate = ( self.config.training.adam_optimizer.initial_learning_rate_micros / 1e6) decay_rate = (self.config.training.adam_optimizer. learning_rate_decay_per_epoch_micros / 1e6) # # resume from prior checkpoint ckpt_path, ckpt_paths = None, None if (self.cache.path / 'checkpoints' / 'checkpoint').exists(): checkpoint_state = tf.train.get_checkpoint_state(self.cache.path / 'checkpoints') assert checkpoint_state assert checkpoint_state.model_checkpoint_path ckpt_path, ckpt_paths = self.GetParamsPath(checkpoint_state) with tf.Session() as sess: tf.global_variables_initializer().run() # Keep all checkpoints. saver = tf.train.Saver(tf.global_variables(), max_to_keep=100, save_relative_paths=True) # restore model from closest checkpoint. if ckpt_path: logging.info("Restoring checkpoint {}".format(ckpt_path)) saver.restore(sess, ckpt_path) # make sure we don't lose track of other checkpoints if ckpt_paths: saver.recover_last_checkpoints(ckpt_paths) # Per-epoch training loop. for epoch_num in range( sess.run(self.epoch) + 1, self.config.training.num_epochs + 1): logger.EpochBeginCallback() # decay and set learning rate new_learning_rate = initial_learning_rate * ( (float(100 - decay_rate) / 100.0)**(epoch_num - 1)) sess.run(tf.assign(self.learning_rate, new_learning_rate)) sess.run(tf.assign(self.epoch, epoch_num)) # TODO(cec): refactor data generator to a Python generator. data_generator.CreateBatches() logging.info('Epoch %d/%d:', epoch_num, self.config.training.num_epochs) state = sess.run(self.initial_state) # Per-batch inner loop. bar = progressbar.ProgressBar( max_value=data_generator.num_batches) for _ in bar(range(data_generator.num_batches)): x, y = data_generator.NextBatch() feed = {self.input_data: x, self.targets: y} for i, (c, h) in enumerate(self.initial_state): feed[c] = state[i].c feed[h] = state[i].h loss, state, _ = sess.run( [self.loss, self.final_state, self.train_op], feed) # Log the loss and delta. logging.info('Loss: %.6f.', loss) # Save after every epoch. start_time = time.time() global_step = epoch_num checkpoint_prefix = (self.cache.path / 'checkpoints' / 'checkpoint') saver.save(sess, checkpoint_prefix, global_step=global_step) checkpoint_path = f'{checkpoint_prefix}-{global_step}' logging.info( 'Saved checkpoint %s in %s ms.', checkpoint_path, humanize.intcomma(int((time.time() - start_time) * 1000))) assert pathlib.Path( f'{checkpoint_prefix}-{global_step}.index').is_file() assert pathlib.Path( f'{checkpoint_prefix}-{global_step}.meta').is_file() logger.EpochEndCallback(epoch_num, loss)
def Train(self, corpus, **unused_kwargs) -> 'keras.models.Sequential': """Locked training. If there are cached epoch checkpoints, the one closest to the target number of epochs will be loaded, and the model will be trained for only the remaining number of epochs, if any. This means that calling this function twice will only actually train the model the first time, and all subsequent calls will be no-ops. This method must only be called when the model is locked. Returns: The trained Keras model. """ del unused_kwargs model = builders.BuildKerasModel(self.config, self.atomizer.vocab_size) with open(self.cache.keypath('model.yaml'), 'w') as f: f.write(model.to_yaml()) model.compile( loss='categorical_crossentropy', optimizer=builders.BuildOptimizer(self.config)) # Print a model summary. buf = io.StringIO() model.summary(print_fn=lambda x: buf.write(x + '\n')) app.Log(1, 'Model summary:\n%s', buf.getvalue()) # TODO(cec): Add an atomizer.CreateVocabularyFile() method, with frequency # counts for a given corpus. def Escape(token: str) -> str: """Make a token visible and printable.""" if token == '\t': return '\\t' elif token == '\n': return '\\n' elif not token.strip(): return f"'{token}'" else: return token if not (self.cache.path / 'embeddings' / 'metadata.tsv').is_file(): with open(self.cache.path / 'embeddings' / 'metadata.tsv', 'w') as f: for _, token in sorted( self.atomizer.decoder.items(), key=lambda x: x[0]): f.write(Escape(token) + '\n') target_num_epochs = self.config.training.num_epochs starting_epoch = 0 epoch_checkpoints = self.epoch_checkpoints if len(epoch_checkpoints) >= target_num_epochs: # We have already trained a model to at least this number of epochs, so # simply the weights from that epoch and call it a day. app.Log(1, 'Loading weights from %s', epoch_checkpoints[target_num_epochs - 1]) model.load_weights(epoch_checkpoints[target_num_epochs - 1]) return model # Now entering the point at which training is inevitable. with logutil.TeeLogsToFile('train', self.cache.path / 'logs'): # Deferred importing of Keras so that we don't have to activate the # TensorFlow backend every time we import this module. import keras if epoch_checkpoints: # We have already trained a model at least part of the way to our target # number of epochs, so load the most recent one. starting_epoch = len(epoch_checkpoints) app.Log(1, 'Resuming training from epoch %d.', starting_epoch) model.load_weights(epoch_checkpoints[-1]) callbacks = [ keras.callbacks.ModelCheckpoint( str(self.cache.path / 'checkpoints' / '{epoch:03d}.hdf5'), verbose=1, mode="min", save_best_only=False), keras.callbacks.TensorBoard( str(self.cache.path / 'embeddings'), write_graph=True, embeddings_freq=1, embeddings_metadata={ 'embedding_1': str(self.cache.path / 'embeddings' / 'metadata.tsv'), }), telemetry.TrainingLogger( self.cache.path / 'logs').KerasCallback(keras), ] generator = data_generators.AutoGenerator(corpus, self.config.training) steps_per_epoch = (corpus.encoded.token_count - 1) // ( self.config.training.batch_size * self.config.training.sequence_length) app.Log( 1, 'Step counts: %s per epoch, %s left to do, %s total', humanize.Commas(steps_per_epoch), humanize.Commas( (target_num_epochs - starting_epoch) * steps_per_epoch), humanize.Commas(target_num_epochs * steps_per_epoch)) model.fit_generator( generator, steps_per_epoch=steps_per_epoch, callbacks=callbacks, initial_epoch=starting_epoch, epochs=target_num_epochs) return model
def main(argv): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(' '.join( argv[1:]))) model_dir = pathlib.Path(FLAGS.reachability_model_dir) if not model_dir.is_dir(): raise app.UsageError('--reachability_model_dir is not a directory.') logger = telemetry.TrainingLogger(logdir=model_dir / 'logs') telemetry_ = logger.EpochTelemetry() num_epochs = len(telemetry_) training_time_ms = sum(t.epoch_wall_time_ms for t in telemetry_) training_time_natural = humanize.naturaldelta(training_time_ms / 1000) time_per_epoch_natural = humanize.naturaldelta(training_time_ms / num_epochs / 1000) losses = [round(t.loss, 2) for t in telemetry_] with open(model_dir / 'model.json') as f: model: keras.models.Model = keras.models.model_from_json(f.read()) model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam') model.summary() print(f'Total training time: {training_time_natural} ' f'({time_per_epoch_natural} per epoch).') print(f'Number of epochs: {num_epochs}.') print(f'Training losses: {losses}.') training_data = pbutil.FromFile(model_dir / 'training_data.pbtxt', reachability_pb2.ReachabilityDataset()) testing_data = pbutil.FromFile(model_dir / 'testing_data.pbtxt', reachability_pb2.ReachabilityDataset()) data = reachability_pb2.ReachabilityDataset() data.entry.extend(training_data.entry) data.entry.extend(testing_data.entry) num_nodes = len(training_data.entry[0].graph.node) num_nodes_natural = humanize.intcomma(num_nodes) num_training_graphs_natural = humanize.intcomma(len(training_data.entry)) num_testing_graphs_natural = humanize.intcomma(len(testing_data.entry)) print(f'Training data: {num_training_graphs_natural} graphs of ' f'{num_nodes_natural} nodes each.') print(f'Testing data: {num_testing_graphs_natural} graphs of ' f'{num_nodes_natural} nodes each.') num_connections_training = sum( sum(len(n.child) for n in entry.graph.node) for entry in training_data.entry) num_connections_testing = sum( sum(len(n.child) for n in entry.graph.node) for entry in testing_data.entry) print( 'Average graph connections: {:.1f} training ({:.1f} per node), ' '{:.1f} testing ({:.1f} per node).'.format( num_connections_training / len(training_data.entry), num_connections_training / (len(training_data.entry) * num_nodes), num_connections_testing / len(testing_data.entry), num_connections_testing / (len(testing_data.entry) * num_nodes))) sequence_length = train_model.GetSequenceLength( len(training_data.entry[0].graph.node)) print('Sequence length:', sequence_length) with open(model_dir / 'atomizer.pkl', 'rb') as f: atomizer: atomizers.AtomizerBase = pickle.load(f) print('Vocabulary size:', atomizer.vocab_size) seqs = [ train_model.ControlFlowGraphToSequence(entry.graph) for entry in data.entry ] num_uniq_seqs = len(set(seqs)) print('Unique sequences: {} of {} ({:.2f}%)'.format( humanize.intcomma(num_uniq_seqs), humanize.intcomma(len(seqs)), (num_uniq_seqs / len(seqs)) * 100)) num_uniq_labels = len( set([''.join(str(x) for x in e.reachable) for e in data.entry])) print('Unique labels: {} of {} ({:.2f}%)'.format( humanize.intcomma(num_uniq_labels), humanize.intcomma(len(seqs)), (num_uniq_labels / len(seqs)) * 100)) test_x, test_y = train_model.ProtosToModelData(testing_data, sequence_length, atomizer) zero_r_acc = sum(sum(x) for x in test_y) / len(testing_data.entry) / num_nodes zero_r_acc = max(zero_r_acc[0], 1 - zero_r_acc[0]) print('Zero-R accuracy: {:.2%}'.format(zero_r_acc)) row = model.evaluate(test_x, test_y, batch_size=FLAGS.batch_size, verbose=0) overall_loss, losses, accuracies = (row[0], row[1:1 + num_nodes], row[num_nodes + 1:]) print('Accuracy: {:.2%}'.format(sum(accuracies) / len(accuracies))) print('Accuracy (excluding first class): {:.2%}'.format( sum(accuracies[1:]) / len(accuracies[1:]))) print('done.')
def Train( self, corpus, test_sampler: typing.Optional[samplers.Sampler] = None, **unused_kwargs, ) -> None: """Locked training. If there are cached epoch checkpoints, the one closest to the target number of epochs will be loaded, and the model will be trained for only the remaining number of epochs, if any. This means that calling this function twice will only actually train the model the first time, and all subsequent calls will be no-ops. This method must only be called when the model is locked. """ del unused_kwargs if self.is_trained: return data_generator = data_generators.TensorflowBatchGenerator( corpus, self.config.training ) tf = self.InitTfGraph() logger = telemetry.TrainingLogger(self.cache.path / "logs") # Create and merge the tensorboard summary ops. merged = tf.compat.v1.summary.merge_all() # training options # TODO(cec): Enable support for multiple optimizers: initial_learning_rate = ( self.config.training.adam_optimizer.initial_learning_rate_micros / 1e6 ) decay_rate = ( self.config.training.adam_optimizer.learning_rate_decay_per_epoch_micros / 1e6 ) # # resume from prior checkpoint ckpt_path, ckpt_paths = None, None if (self.cache.path / "checkpoints" / "checkpoint").exists(): checkpoint_state = tf.train.get_checkpoint_state( self.cache.path / "checkpoints" ) assert checkpoint_state assert checkpoint_state.model_checkpoint_path ckpt_path, ckpt_paths = self.GetParamsPath(checkpoint_state) with tf.compat.v1.Session() as sess, self.dashboard_db.Session() as dbs: dbs.query(dashboard_db.TrainingTelemetry).filter( dashboard_db.TrainingTelemetry.model_id == self.dashboard_model_id ).filter(dashboard_db.TrainingTelemetry.pending == True).delete() tf.compat.v1.global_variables_initializer().run() # Keep all checkpoints. saver = tf.compat.v1.train.Saver( tf.global_variables(), max_to_keep=100, save_relative_paths=True ) # restore model from closest checkpoint. if ckpt_path: app.Log(1, "Restoring checkpoint {}".format(ckpt_path)) saver.restore(sess, ckpt_path) # make sure we don't lose track of other checkpoints if ckpt_paths: saver.recover_last_checkpoints(ckpt_paths) # Offset epoch counts by 1 so that they are in the range [1..n] current_epoch = sess.run(self.epoch) + 1 max_epoch = self.config.training.num_epochs + 1 # Per-epoch training loop. for epoch_num in range(current_epoch, max_epoch): logger.EpochBeginCallback() # decay and set learning rate new_learning_rate = initial_learning_rate * ( (float(100 - decay_rate) / 100.0) ** (epoch_num - 1) ) sess.run(tf.compat.v1.assign(self.learning_rate, new_learning_rate)) sess.run(tf.compat.v1.assign(self.epoch, epoch_num)) # TODO(cec): refactor data generator to a Python generator. data_generator.CreateBatches() app.Log(1, "Epoch %d/%d:", epoch_num, self.config.training.num_epochs) state = sess.run(self.initial_state) # Per-batch inner loop. bar = progressbar.ProgressBar(max_value=data_generator.num_batches) last_log_time = time.time() for i in bar(range(data_generator.num_batches)): x, y = data_generator.NextBatch() feed = {self.input_data: x, self.targets: y} for j, (c, h) in enumerate(self.initial_state): feed[c], feed[h] = state[j].c, state[j].h summary, loss, state, _ = sess.run( [merged, self.loss, self.final_state, self.train_op], feed ) # Periodically write progress to tensorboard. if i % FLAGS.clgen_tf_backend_tensorboard_summary_step_count == 0: step = (epoch_num - 1) * data_generator.num_batches + i self.summary_writer.add_summary(summary, step) # Add telemetry database entry. This isn't committed until the end # of the epoch, when the checkpoint is created. now = time.time() duration_ns = int((now - last_log_time) * 1e6) dbs.add( dashboard_db.TrainingTelemetry( model_id=self.dashboard_model_id, epoch=epoch_num, step=step, training_loss=loss, learning_rate=new_learning_rate, ns_per_batch=int(duration_ns) / FLAGS.clgen_tf_backend_tensorboard_summary_step_count, ) ) last_log_time = now dbs.commit() # Log the loss and delta. app.Log(1, "Loss: %.6f.", loss) # Save after every epoch. start_time = time.time() global_step = epoch_num checkpoint_prefix = self.cache.path / "checkpoints" / "checkpoint" checkpoint_path = saver.save( sess, checkpoint_prefix, global_step=global_step ) app.Log( 1, "Saved checkpoint %s in %s ms.", checkpoint_path, humanize.Commas(int((time.time() - start_time) * 1000)), ) dbs.query(dashboard_db.TrainingTelemetry).filter( dashboard_db.TrainingTelemetry.pending == True ).update({"pending": False}) dbs.commit() assert pathlib.Path( f"{checkpoint_prefix}-{global_step}.index" ).is_file() assert pathlib.Path(f"{checkpoint_prefix}-{global_step}.meta").is_file() logger.EpochEndCallback(epoch_num, loss) # If we have a sampler that we can use at the end of epochs, then # This is confusing logic! Consider a refactor to simplify things. if test_sampler: break else: return if test_sampler and FLAGS.clgen_per_epoch_test_samples > 0: self._EndOfEpochTestSample(corpus, test_sampler, step, epoch_num) self.Train(corpus, test_sampler)
def main(argv): """Main entry point.""" if len(argv) > 1: raise app.UsageError("Unknown arguments: '{}'.".format(' '.join( argv[1:]))) model_dir = pathlib.Path(FLAGS.reachability_model_dir) model_dir.mkdir(parents=True, exist_ok=True) (model_dir / 'logs').mkdir(exist_ok=True) (model_dir / 'checkpoints').mkdir(exist_ok=True) logging.info('Generating graphs dataset ...') data = MakeReachabilityDataset(FLAGS.reachability_num_training_graphs + FLAGS.reachability_num_testing_graphs) training_data = reachability_pb2.ReachabilityDataset() training_data.entry.extend( data.entry[:FLAGS.reachability_num_training_graphs]) pbutil.ToFile(training_data, model_dir / 'training_data.pbtxt') testing_data = reachability_pb2.ReachabilityDataset() testing_data.entry.extend( data.entry[FLAGS.reachability_num_training_graphs:]) pbutil.ToFile(testing_data, model_dir / 'testing_data.pbtxt') logging.info('Number of training examples: %s.', humanize.intcomma(len(training_data.entry))) logging.info('Number of testing examples: %s.', humanize.intcomma(len(testing_data.entry))) n = FLAGS.reachability_num_nodes sequence_length = GetSequenceLength(FLAGS.reachability_num_nodes) logging.info('Using sequence length %s.', humanize.intcomma(sequence_length)) seqs = [ControlFlowGraphToSequence(entry.graph) for entry in data.entry] text = '\n'.join(seqs) logging.info('Deriving atomizer from %s chars.', humanize.intcomma(len(text))) atomizer = atomizers.AsciiCharacterAtomizer.FromText(text) logging.info('Vocabulary size: %s.', humanize.intcomma(len(atomizer.vocab))) with open(model_dir / 'atomizer.pkl', 'wb') as f: pickle.dump(atomizer, f) logging.info('Pickled atomizer to %s.', model_dir / 'atomizer.pkl') x, y = ProtosToModelData(training_data, sequence_length, atomizer) logging.info('Training data: x %s, y[%s] %s', x.shape, len(y), y[0].shape) test_x, test_y = ProtosToModelData(testing_data, sequence_length, atomizer) logging.info('Testing data: x %s, y[%s] %s', test_x.shape, len(test_y), test_y[0].shape) num_uniq_seqs = len(set(seqs)) logging.info('Unique sequences: %s of %s (%.2f %%)', humanize.intcomma(num_uniq_seqs), humanize.intcomma(len(seqs)), (num_uniq_seqs / len(seqs)) * 100) num_uniq_labels = len( set([''.join(str(x) for x in e.reachable) for e in data.entry])) logging.info('Unique labels: %s of %s (%.2f %%)', humanize.intcomma(num_uniq_labels), humanize.intcomma(len(seqs)), (num_uniq_labels / len(seqs)) * 100) np.random.seed(FLAGS.reachability_model_seed) random.seed(FLAGS.reachability_model_seed) logging.info('Building Keras model ...') model = BuildKerasModel(sequence_length=sequence_length, num_classes=n, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, dnn_size=FLAGS.dnn_size, atomizer=atomizer) model_json = model.to_json() with open(model_dir / 'model.json', 'w') as f: f.write(model_json) logging.info('Wrote model to %s', model_dir / 'model.json') logging.info('Training model ...') def OnEpochEnd(epoch, logs): """End-of-epoch model evaluate.""" del logs logging.info('Evaluating model at epoch %d', epoch) # score, accuracy row = model.evaluate(test_x, test_y, batch_size=FLAGS.batch_size, verbose=0) overall_loss, losses, accuracies = row[0], row[1:1 + n], row[n + 1:] logging.info('Accuracy (excluding first class): %.2f %%', (sum(accuracies[1:]) / len(accuracies[1:])) * 100) logger = telemetry.TrainingLogger(logdir=model_dir / 'logs') model.fit( x, y, epochs=FLAGS.num_epochs, batch_size=FLAGS.batch_size, verbose=True, shuffle=True, callbacks=[ keras.callbacks.ModelCheckpoint(str(model_dir / 'checkpoints') + '/weights_{epoch:03d}.hdf5', verbose=1, mode="min", save_best_only=False), keras.callbacks.LambdaCallback(on_epoch_end=OnEpochEnd), logger.KerasCallback(keras), ]) for i in range(5): outs = FlattenModelOutputs(model.predict(np.array([x[i]]))) logging.info('outs: %s', outs) logging.info('clamped: %s', np.rint(outs).astype(np.int32)) logging.info('true: %s', FlattenModelData(y, i)) logging.info('') logging.info('done')