def main(params): parser = argparse.ArgumentParser() parser.add_argument('--num_epochs', type=int, default=300, help='Number of epochs to train for') parser.add_argument('--epoch_start_i', type=int, default=0, help='Start counting epochs from this number') parser.add_argument('--checkpoint_step', type=int, default=5, help='How often to save checkpoints (epochs)') parser.add_argument('--learning_rate', type=float, default=0.01, help='learning rate used for train') parser.add_argument('--cuda', type=str, default='0', help='GPU ids used for training') parser.add_argument('--save_model_path', type=str, default=None, help='path to save model') parser.add_argument('--pretrained_model_path', type=str, default=None, help='path to pretrained model') args = parser.parse_args(params) # create dataset and dataloader dataloader_train = DataLoader(train(input_transform, target_transform), num_workers=1, batch_size=2, shuffle=True) # build model os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda model = DANet(nclass=2, backbone='resnet50', aux=False, se_loss=False) model = model.cuda() # build optimizer optimizer = torch.optim.RMSprop(model.parameters(), args.learning_rate) # load pretrained model if exists if args.pretrained_model_path is not None: print('load model from %s ...' % args.pretrained_model_path) model.module.load_state_dict(torch.load(args.pretrained_model_path)) print('Done!') # train train(args, model, optimizer, dataloader_train)
def train_input_fn(): ds = dataset.train(data_path) ds = ds.shuffle(buffer_size=50000) ds = ds.take(5000) # just to speed up training ds = ds.batch(params['batch_size']) ds = ds.repeat(params['nb_epochs']) return ds
def __init__(self, backend): self.train_data = dataset.train("/tmp/mnist_data") self.test_data = dataset.test("/tmp/mnist_data") self.backend = backend self.train_images = np.reshape( self.train_data['images'][:TRAIN_SIZE], (-1, IMAGE_SIZE, IMAGE_SIZE, IMAGE_DEPTH)) self.train_labels = self.train_data['labels'][:TRAIN_SIZE] self.test_images = np.reshape( self.test_data['images'][:TRAIN_SIZE], (-1, IMAGE_SIZE, IMAGE_SIZE, IMAGE_DEPTH)) self.test_labels = self.test_data['labels'][:TRAIN_SIZE] if backend == 'gpu': self.device = "/device:GPU:0" else: assert backend == 'cpu', 'Invalid backend specified: %s' % backend self.device = "/cpu:0" print("Creating model") self.model = MNIST.create_model()
def main(_): print('Loading dataset') mnist = input_data.read_data_sets(FLAGS.data_dir) print('%d train images' % mnist.train.num_examples) print('%d test images' % mnist.test.num_examples) batch_size = 128 max_steps = 10000 train_ds = dataset.train('/tmp/mnist') train_ds = train_ds.shuffle(buffer_size=50000) train_ds = train_ds.batch(batch_size) train_ds = train_ds.repeat(1) x, y = train_ds.make_one_shot_iterator().get_next() logits = build_model(x) cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=logits) loss = tf.reduce_mean(cross_entropy) train_op = tf.train.AdamOptimizer(1e-4).minimize(loss) correct_prediction = tf.cast( tf.equal(tf.argmax(logits, 1), tf.cast(y, tf.int64)), tf.float32) accuracy = tf.reduce_mean(correct_prediction) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(max_steps): _, train_loss, train_acc = sess.run([train_op, loss, accuracy]) if i % 10 == 0: print('step %d: train_loss=%f train_acc=%g' % (i, train_loss, train_acc))
def train_input_fn(): ds = dataset.train(data_dir) ds = ds.cache() ds = ds.shuffle(buffer_size=50000) ds = ds.batch(batch_size) ds = ds.repeat(1) return ds
def train_data(params): batch_size = params['batch_size'] data_dir = params['data_dir'] data = dataset.train(data_dir) data = data.cache().repeat().shuffle(buffer_size=50000) data = data.batch(batch_size, drop_remainder=True) # drop?? return data
def main(unused): if FLAGS.run_gpu: backend = "/device:GPU:0" else: backend = "/cpu:0" mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, model_dir=FLAGS.model_dir, params={ 'backend': backend, }) if FLAGS.mode == 'train' or FLAGS.mode == 'both': ds = dataset.train(FLAGS.data_dir) # Train the model train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": ds['images']}, y=ds['labels'], batch_size=FLAGS.batch_size, num_epochs=FLAGS.train_epochs, shuffle=True) mnist_classifier.train(input_fn=train_input_fn, steps=200) if FLAGS.mode == 'predict' or FLAGS.mode == 'both': ds = dataset.test(FLAGS.data_dir) # Predict test set pred_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": ds['images']}, shuffle=False) mnist_classifier.predict(input_fn=pred_input_fn)
def train_input_fn(): # When choosing shuffle buffer sizes, larger sizes result in better # randomness, while smaller sizes use less memory. MNIST is a small # enough dataset that we can easily shuffle the full epoch. ds = dataset.train(FLAGS.data_dir) ds = ds.cache().shuffle(buffer_size=50000).batch( FLAGS.batch_size).repeat(FLAGS.train_epochs) return ds
def train_input_fn(): # When choosing shuffle buffer sizes, larger sizes result in better # randomness, while smaller sizes use less memory. MNIST is a small # enough dataset that we can easily shuffle the full epoch. ds = dataset.train(FLAGS.data_dir) ds = ds.cache().shuffle(buffer_size=50000).batch(FLAGS.batch_size).repeat( FLAGS.train_epochs) return ds
def train_input_fn(): # When choosing shuffle buffer sizes, larger sizes result in better # randomness, while smaller sizes use less memory. MNIST is a small # enough dataset that we can easily shuffle the full epoch. ds = dataset.train(FLAGS.data_dir) # ds = ds.cache().shuffle(buffer_size=50000).batch(FLAGS.batch_size).repeat( ds = ds.cache().batch(FLAGS.batch_size).repeat(FLAGS.train_epochs) (images, labels) = ds.make_one_shot_iterator().get_next() return (images, labels)
def eval_data(params): batch_size = params['batch_size'] data_dir = params['data_dir'] data = dataset.train(data_dir) # Take out top several samples from test data to make the predictions. data = data.cache().repeat().shuffle( buffer_size=50000) # shuffle too slow ?? data = data.batch(batch_size, drop_remainder=True) return data
def train_input_fn(): # When choosing shuffle buffer sizes, larger sizes result in better # randomness, while smaller sizes use less memory. MNIST is a small # enough dataset that we can easily shuffle the full epoch. ds = dataset.train(DATA_DIR) ds = ds.cache().shuffle(buffer_size=50000).batch(FLAGS.batch_size).repeat(FLAGS.num_epochs) (images, labels) = tf.compat.v1.data.make_one_shot_iterator(ds).get_next() (cimages, clabels) = tf.compat.v1.data.make_one_shot_iterator(ds).get_next() count_epochs(cimages) return (images, labels)
def train_input_fn(params): """train_input_fn defines the input pipeline used for training.""" batch_size = params["batch_size"] data_dir = params["data_dir"] # Retrieves the batch size for the current shard. The # of shards is # computed according to the input pipeline deployment. See # `tf.contrib.tpu.RunConfig` for details. ds = dataset.train(data_dir).cache().repeat().shuffle( buffer_size=50000).batch(batch_size, drop_remainder=True) return ds
def train_input_fn(params): batch_size = params["batch_size"] data_dir = params["data_dir"] # Retrieves the batch size for the current shard. The # of shards is # computed according to the input pipeline deployment. See # `tf.contrib.tpu.RunConfig` for details. ds = dataset.train(data_dir).cache().repeat().shuffle( buffer_size=50000).apply( tf.contrib.data.batch_and_drop_remainder(batch_size)) images, labels = ds.make_one_shot_iterator().get_next() return images, labels
def train(): questions = list(questions_from_dataset(dataset.train())) random.shuffle(questions) test_questions = list(questions_from_dataset(dataset.test())) random.shuffle(test_questions) i = 0 for i, batch in enumerate(iterate_batches(questions, size=20)): n.train(batch) if i % 10 == 0: n.save(i)
def run_mnist_eager(): """Run MNIST training and eval loop in eager mode. """ data_dir = '/tmp/tensorflow/mnist/input_data' + str(ddl.rank()) model_dir = '/tmp/tensorflow/mnist/checkpoints/' + str(ddl.rank()) + '/' # Delete model dir if os.path.isdir(model_dir) and ddl.local_rank() == 0: shutil.rmtree(model_dir) data_format = 'channels_first' # Load the datasets train_ds, _ = mnist_dataset.train(data_dir, (1, 28, 28), label_int=True) train_ds = train_ds.shard(ddl.size(), ddl.rank()).shuffle(60000).batch(batch_size) test_ds, _ = mnist_dataset.test(data_dir, (1, 28, 28), label_int=True) test_ds = test_ds.batch(batch_size) # Create the model and optimizer model = create_model(data_format) optimizer = tf.train.MomentumOptimizer(0.01, 0.5) train_dir = None test_dir = None summary_writer = tf.contrib.summary.create_file_writer(train_dir, flush_millis=10000) test_summary_writer = tf.contrib.summary.create_file_writer( test_dir, flush_millis=10000, name='test') # Create and restore checkpoint (if one exists on the path) checkpoint_prefix = os.path.join(model_dir, 'ckpt-r' + str(ddl.rank())) step_counter = tf.train.get_or_create_global_step() checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer, step_counter=step_counter) # Restore variables on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(model_dir)) # Train and evaluate for a set number of epochs. for _ in range(train_epochs): start = time.time() with summary_writer.as_default(): train(model, optimizer, train_ds, step_counter, 10) end = time.time() if ddl.rank() == 0: print('\nTrain time for epoch #%d (%d total steps): %f' % (checkpoint.save_counter.numpy() + 1, step_counter.numpy(), end - start)) with test_summary_writer.as_default(): test(model, test_ds) checkpoint.save(checkpoint_prefix)
def main(_): tfe.enable_eager_execution() (device, data_format) = ('/gpu:0', 'channels_first') if FLAGS.no_gpu or tfe.num_gpus() <= 0: (device, data_format) = ('/cpu:0', 'channels_last') print('Using device %s, and data format %s.' % (device, data_format)) # Load the datasets train_ds = dataset.train(FLAGS.data_dir).shuffle(60000).batch( FLAGS.batch_size) test_ds = dataset.test(FLAGS.data_dir).batch(FLAGS.batch_size) # Create the model and optimizer model = mnist.Model(data_format) optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum) if FLAGS.output_dir: # Create directories to which summaries will be written # tensorboard --logdir=<output_dir> # can then be used to see the recorded summaries. train_dir = os.path.join(FLAGS.output_dir, 'train') test_dir = os.path.join(FLAGS.output_dir, 'eval') tf.gfile.MakeDirs(FLAGS.output_dir) else: train_dir = None test_dir = None summary_writer = tf.contrib.summary.create_file_writer(train_dir, flush_millis=10000) test_summary_writer = tf.contrib.summary.create_file_writer( test_dir, flush_millis=10000, name='test') checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt') step_counter = tf.train.get_or_create_global_step() checkpoint = tfe.Checkpoint(model=model, optimizer=optimizer, step_counter=step_counter) # Restore variables on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(FLAGS.checkpoint_dir)) # Train and evaluate for 10 epochs. with tf.device(device): for _ in range(10): start = time.time() with summary_writer.as_default(): train(model, optimizer, train_ds, step_counter, FLAGS.log_interval) end = time.time() print('\nTrain time for epoch #%d (%d total steps): %f' % (checkpoint.save_counter.numpy() + 1, step_counter.numpy(), end - start)) with test_summary_writer.as_default(): test(model, test_ds) checkpoint.save(checkpoint_prefix)
def train_input_fn(): """Prepare data for training.""" # When choosing shuffle buffer sizes, larger sizes result in better # randomness, while smaller sizes use less memory. MNIST is a small # enough dataset that we can easily shuffle the full epoch. ds = dataset.train(flags_obj.data_dir) ds = ds.cache().shuffle(buffer_size=50000).batch(flags_obj.batch_size) # Iterate through the dataset a set number (`epochs_between_evals`) of times # during each training session. ds = ds.repeat(flags_obj.epochs_between_evals) return ds
def main(_): tfe.enable_eager_execution() (device, data_format) = ('/gpu:0', 'channels_first') if FLAGS.no_gpu or tfe.num_gpus() <= 0: (device, data_format) = ('/cpu:0', 'channels_last') print('Using device %s, and data format %s.' % (device, data_format)) # Load the datasets train_ds = dataset.train(FLAGS.data_dir).shuffle(60000).batch( FLAGS.batch_size) test_ds = dataset.test(FLAGS.data_dir).batch(FLAGS.batch_size) # Create the model and optimizer model = mnist.Model(data_format) optimizer = tf.train.MomentumOptimizer(FLAGS.lr, FLAGS.momentum) if FLAGS.output_dir: # Create directories to which summaries will be written # tensorboard --logdir=<output_dir> # can then be used to see the recorded summaries. train_dir = os.path.join(FLAGS.output_dir, 'train') test_dir = os.path.join(FLAGS.output_dir, 'eval') tf.gfile.MakeDirs(FLAGS.output_dir) else: train_dir = None test_dir = None summary_writer = tf.contrib.summary.create_file_writer( train_dir, flush_millis=10000) test_summary_writer = tf.contrib.summary.create_file_writer( test_dir, flush_millis=10000, name='test') checkpoint_prefix = os.path.join(FLAGS.checkpoint_dir, 'ckpt') step_counter = tf.train.get_or_create_global_step() checkpoint = tfe.Checkpoint( model=model, optimizer=optimizer, step_counter=step_counter) # Restore variables on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(FLAGS.checkpoint_dir)) # Train and evaluate for 10 epochs. with tf.device(device): for _ in range(10): start = time.time() with summary_writer.as_default(): train(model, optimizer, train_ds, step_counter, FLAGS.log_interval) end = time.time() print('\nTrain time for epoch #%d (%d total steps): %f' % (checkpoint.save_counter.numpy() + 1, step_counter.numpy(), end - start)) with test_summary_writer.as_default(): test(model, test_ds) checkpoint.save(checkpoint_prefix)
def train_input_fn(data_dir, batch_size=100): """Prepare data for training.""" # When choosing shuffle buffer sizes, larger sizes result in better # randomness, while smaller sizes use less memory. MNIST is a small # enough dataset that we can easily shuffle the full epoch. ds = dataset.train(data_dir) ds = ds.cache().shuffle(buffer_size=50000).batch(batch_size=batch_size) # Iterate through the dataset a set number of times # during each training session. ds = ds.repeat(40) features = ds.make_one_shot_iterator().get_next() return {'pixels': features[0]}, features[1]
def main(model_uri, data_path): tf_graph = tf.Graph() with tf.Session(graph=tf_graph) as sess: with tf_graph.as_default(): # Use MNIST Dataset we have used to training ds = dataset.train(data_path) next_op = tf.data.make_one_shot_iterator(ds).get_next() # Load the MLflow model signature_def = mlflow.tensorflow.load_model(model_uri=model_uri, tf_sess=sess) input_tensors = { input_signature.name: tf_graph.get_tensor_by_name(input_signature.name) for _, input_signature in signature_def.inputs.items() } output_tensors = { output_signature.name: tf_graph.get_tensor_by_name(output_signature.name) for _, output_signature in signature_def.outputs.items() } for _ in range(10): # This uses a 2-step process: # 1. Run the `next_op` to fetch the next image from the dataset # 2. Use a feed dictionary to run the prediction # This is for purpose of demonstration only and should never be # used in a real system because this is very inefficient. image, label = sess.run(next_op) feed_dict = { input_tensors['images:0']: np.expand_dims(image, axis=0) } pred = sess.run(output_tensors['ArgMax:0'], feed_dict=feed_dict)[0] correct = 'SAME' if label == pred else 'NOT_SAME' print(label, pred, correct)
def run_mnist_eager(flags_obj): """ Run MNIST training and eval loop in eager mode. Args: flags_obj: An object containing parsed flag values. """ # Soft placement config = tf.ConfigProto() config.gpu_options.allow_growth = True tfe.enable_eager_execution(config=config) model_helpers.apply_clean(flags.FLAGS) # Automatically determine device and data_format (device, data_format) = ('/gpu:0', 'channels_first') if flags_obj.no_gpu or not tf.test.is_gpu_available(): (device, data_format) = ('/cpu:0', 'channels_last') # If data_format is defined in FLAGS, overwrite automatically set value. if flags_obj.data_format is not None: data_format = flags_obj.data_format print('Using device %s, and data format %s.' % (device, data_format)) # Load the datasets train_ds = mnist_dataset.train(flags_obj.data_dir).shuffle(60000).batch( flags_obj.batch_size) test_ds = mnist_dataset.test(flags_obj.data_dir).batch( flags_obj.batch_size) # Create the model and optimizer model = model_lib.create_model(data_format) optimizer = tf.train.MomentumOptimizer(flags_obj.lr, flags_obj.momentum) # Print model summary print(model.summary()) # Create file writers for writing TensorBoard summaries. if flags_obj.output_dir: # Create directories to which summaries will be written # tensorboard --logdir=<output_dir> # can then be used to see the recorded summaries. train_dir = os.path.join(flags_obj.output_dir, 'train') test_dir = os.path.join(flags_obj.output_dir, 'eval') tf.gfile.MakeDirs(flags_obj.output_dir) else: train_dir = None test_dir = None summary_writer = tf.contrib.summary.create_file_writer(train_dir, flush_millis=10000) test_summary_writer = tf.contrib.summary.create_file_writer( test_dir, flush_millis=10000, name='test') # Create and restore checkpoint (if one exists on the path) checkpoint_prefix = os.path.join(flags_obj.model_dir, 'ckpt') step_counter = tf.train.get_or_create_global_step() checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer, step_counter=step_counter) # Restore variables on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(flags_obj.model_dir)) # Train and evaluate for a set number of epochs. with tf.device(device): for _ in range(flags_obj.train_epochs): start = time.time() with summary_writer.as_default(): train(model, optimizer, train_ds, step_counter, flags_obj.log_interval) end = time.time() # Note time taken print('\nTrain time for epoch #%d (%d total steps): %f' % (checkpoint.save_counter.numpy() + 1, step_counter.numpy(), end - start)) with test_summary_writer.as_default(): test(model, test_ds) checkpoint.save(checkpoint_prefix)
import tensorflow as tf tf.enable_eager_execution() tfe = tf.contrib.eager import mnist import dataset # download dataset.py file dataset_train = dataset.train('./datasets').shuffle(60000).repeat(4).batch(32) def loss(model, x, y): prediction = model(x) return tf.losses.sparse_softmax_cross_entropy(labels=y, logits=prediction) def grad(model, inputs, targets): with tf.GradientTape() as tape: loss_value = loss(model, inputs, targets) return tape.gradient(loss_value, model.variables) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) x, y = iter(dataset_train).next() print("Initial loss: {:.3f}".format(loss(model, x, y))) # Training loop for (i, (x, y)) in enumerate(dataset_train):
from keras.models import Sequential #from sklearn.model_selection import train_test_split import numpy as np import dataset import os batch_size = 64 num_classes = 62 epochs = 10 img_rows, img_cols = 28, 28 print('Start loading data.') #Da modificar folder_path = os.getcwd() train_dataset = dataset.train(folder_path + '\emnist') test_dataset = dataset.test(folder_path + '\emnist') print('Data has been loaded.') #Non so se le reshape dei tensor x e y contenenti train e test vadano fatte if K.image_data_format() == 'channels_first': #x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) #x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: #x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) #x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) """print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples')"""
epochs = 12 # input image dimensions img_rows, img_cols = 28, 28 # data_dir data_dir = "/tmp/mnist_convnet_model_data" + str(ddl.rank()) input_shape = () if K.image_data_format() == 'channels_first': input_shape = (1, img_rows, img_cols) else: input_shape = (img_rows, img_cols, 1) # the data, split between train and test sets (train_set, num_of_train_imgs) = dataset.train(data_dir, input_shape) train_set = train_set.shard(ddl.size(), ddl.rank()) train_set = train_set.cache().shuffle( buffer_size=1000).batch(batch_size).repeat() (eval_set, num_of_test_imgs) = dataset.test(data_dir, input_shape) eval_full = eval_set eval_set = eval_set.shard(ddl.size(), ddl.rank()) eval_set = eval_set.batch(batch_size).repeat() num_of_all_test_imgs = num_of_test_imgs num_of_train_imgs /= ddl.size() num_of_test_imgs /= ddl.size() model = Sequential() model.add(
def main(argv): parser = MNISTEagerArgParser() flags = parser.parse_args(args=argv[1:]) # TF v1.7 tfe.enable_eager_execution() # Automatically determine device and data_format (device, data_format) = ('/gpu:0', 'channels_first') if flags.no_gpu or tfe.num_gpus() <= 0: (device, data_format) = ('/cpu:0', 'channels_last') # If data_format is defined in FLAGS, overwrite automatically set value. if flags.data_format is not None: data_format = flags.data_format # Log Info print("-" * 64) print("TEST INFO - EAGER") print("-" * 64) print("TF version:\t {}".format(tf.__version__)) print("Eager execution:\t {}".format(tf.executing_eagerly())) print("Dataset:\t MNIST") print("Model:\t CNN") print('Device:\t {}'.format(device)) if data_format == 'channels_first': print("Data format:\t NCHW (channel first)") else: print("Data format:\t NHWC (channel last)") print("=" * 64) # Load the datasets train_ds = mnist_dataset.train(flags.data_dir).shuffle(60000).batch( flags.batch_size) test_ds = mnist_dataset.test(flags.data_dir).batch(flags.batch_size) # Create the model and optimizer # model = create_model(data_format) model = MNISTModel(data_format) optimizer = tf.train.MomentumOptimizer(flags.lr, flags.momentum) # Create file writers for writing TensorBoard summaries. if flags.output_dir: # Create directories to which summaries will be written # tensorboard --logdir=<output_dir> # can then be used to see the recorded summaries. train_dir = os.path.join(flags.output_dir, 'train') test_dir = os.path.join(flags.output_dir, 'eval') tf.gfile.MakeDirs(flags.output_dir) else: train_dir = None test_dir = None summary_writer = tf.contrib.summary.create_file_writer(train_dir, flush_millis=10000) test_summary_writer = tf.contrib.summary.create_file_writer( test_dir, flush_millis=10000, name='test') # Create and restore checkpoint (if one exists on the path) checkpoint_prefix = os.path.join(flags.model_dir, 'ckpt') step_counter = tf.train.get_or_create_global_step() checkpoint = tfe.Checkpoint(model=model, optimizer=optimizer, step_counter=step_counter) # Restore variables on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(flags.model_dir)) # Train and evaluate for a set number of epochs. with tf.device(device): for _ in range(flags.train_epochs): start = time.time() with summary_writer.as_default(): train(model, optimizer, train_ds, step_counter, flags.log_interval) end = time.time() print('\nTrain time for epoch #%d (%d total steps): %f' % (checkpoint.save_counter.numpy() + 1, step_counter.numpy(), end - start)) with test_summary_writer.as_default(): test(model, test_ds) checkpoint.save(checkpoint_prefix)
def train_data(): data = dataset.train(FLAGS.data_dir) data = data.cache() data = data.batch(FLAGS.batch_size) return data
import tensorflow as tf import dataset sess = tf.InteractiveSession() mnist_train = dataset.train("./mnist_data") mnist_test = dataset.test("./mnist_data") # https://www.tensorflow.org/guide/datasets print(mnist_train.output_shapes, mnist_train.output_types) print(mnist_test.output_shapes, mnist_test.output_types) batched_train = mnist_train.batch(100) iterator = batched_train.make_one_shot_iterator() next_element = iterator.get_next() x = tf.placeholder(tf.float32, [None, 784]) W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) # y is a matrix y = tf.nn.softmax(tf.matmul(x, W) + b) # TODO: try [None, 10] y_ = tf.placeholder(tf.int32, [None]) # TODO: why this not work? # y_ = tf.placeholder(tf.int32, [None, 1]) # y_ = tf.placeholder(tf.float32, [None, 10]) # cause y is a 2D matrix, note the meaning of reduce_sum one_hot_y = tf.one_hot(y_, 10)
def main(_): # Parameters learning_rate = 0.001 training_iters = FLAGS.num_iterations batch_size = 100 display_step = 1 # Network Parameters n_input = 784 # MNIST data input (img shape: 28*28) n_classes = 10 # MNIST total classes (0-9 digits) dropout = 0.75 # Dropout, probability to keep units ############################################################################ # Import MNIST data ############################################################################ data_dir = FLAGS.data_dir + str(ddl.local_rank()) (train_set, num_of_train_imgs) = dataset.train(data_dir, (28, 28, 1), VARTYPE) train_set = train_set.shard(ddl.size(), ddl.rank()) train_set = train_set.batch(batch_size).cache().shuffle(buffer_size=1000).repeat() X_train, Y_train = train_set.make_one_shot_iterator().get_next() # Construct model pred, keep_prob = deepnn(X_train) # Define loss and optimizer with tf.name_scope('loss'): cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y_train, logits=pred)) with tf.name_scope('adam_optimizer'): optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-4) objective = optimizer.minimize(cost) # Evaluate model with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y_train, 1)) correct_prediction = tf.cast(correct_prediction, VARTYPE) accuracy = tf.reduce_mean(correct_prediction) graph_location = tempfile.mkdtemp() print('Saving graph to: %s' % graph_location) train_writer = tf.summary.FileWriter(graph_location) train_writer.add_graph(tf.get_default_graph()) # Launch the graph with tf.Session(config=tf.ConfigProto()) as sess: sess.run(tf.global_variables_initializer()) my_variable = bias_variable([5, 5, 1, 32]) sess.run(my_variable.initializer) step = 1 # Keep training until reach max iterations while step * batch_size < training_iters: # Run optimization op (backprop) sess.run(objective) if step % display_step == 0: # Calculate batch loss and accuracy loss, acc = sess.run([cost, accuracy]) print("DDL " + str(ddl.rank()) + "] Iter " + str(step * batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + ", Training Accuracy= " + "{:.5f}".format(acc)) step += 1 print("DDL "+str(ddl.rank())+"] Optimization Finished!") # Calculate accuracy for 256 mnist test images print("DDL "+str(ddl.rank())+"] Testing Accuracy:", sess.run(accuracy))
def train(model): best_acc = 0 num_epochs = 60 loader = DataLoader(train(input_transform, target_transform), num_workers=1, batch_size=4, shuffle=True) criterion = nn.CrossEntropyLoss() savedir = './save' automated_log_path = savedir + "/log.txt" modeltxtpath = savedir + "/model.txt" with open(modeltxtpath, "w") as myfile: myfile.write(str(model)) optimizer = Adam(model.parameters(), 1e-4, (0.9, 0.999), eps=1e-08, weight_decay=2e-4) ## scheduler 1 start_epoch = 1 lr_updater = lr_scheduler.StepLR(optimizer, 100, 0.1) ## scheduler 2 #Note: this only loads initialized weights. If you want to resume a training use "--resume" option!! def load_my_state_dict( model, state_dict ): #custom function to load model when not all dict keys are there own_state = model.state_dict() for name, param in state_dict.items(): if name not in own_state: continue own_state[name].copy_(param) return model for epoch in range(start_epoch, num_epochs + 1): print("----- TRAINING - EPOCH", epoch, "-----") lr_updater.step() epoch_loss = [] time_train = [] usedLr = 0 for param_group in optimizer.param_groups: print("LEARNING RATE: ", param_group['lr']) usedLr = float(param_group['lr']) model.train() for step, (images, labels) in enumerate(loader): start_time = time.time() images = images.cuda() labels = labels.cuda() inputs = Variable(images) targets = Variable(labels) #print inputs.size(),targets.size() outputs = model(inputs) #print outputs.size() optimizer.zero_grad() loss = criterion(outputs, targets[:, 0]) loss.backward() optimizer.step() #print loss.item() epoch_loss.append(loss.item()) time_train.append(time.time() - start_time) if step % 100 == 0: average = sum(epoch_loss) / len(epoch_loss) print('epoch:%f' % epoch, 'step:%f' % step, 'loss:%f' % average) with open(automated_log_path, "a") as myfile: myfile.write("\n%d\t\t%d\t\t%.4f" % (epoch, step, average)) if epoch % 1 == 0 and epoch != 0: filename = 'main-' + 'eda' + '-step-' + str( step) + '-epoch-' + str(epoch) + '.pth' torch.save(model.state_dict(), './save/model/' + filename) return (model)
def main(): savedir = './save' Net = EDANet(NUM_CLASSES) Net = Net.cuda() train(Net)
def mnist(learning_rate, initializer_mode, num_conv_layers, num_fc_layers): if num_conv_layers not in [1, 2]: raise ValueError("num_conv_layers should be 1 or 2") if num_fc_layers not in [1, 2]: raise ValueError("num_fc_layers should be 1 or 2") def make_hyperparameter_string(learning_rate, initializer_mode, num_conv_layers, num_fc_layers): hyperparameter = "lr_%.e_" % learning_rate if initializer_mode == 0: hyperparameter += "xavier_constant" else: hyperparameter += "truncated_normal_constant" hyperparameter += "_%d_conv_%d_fc" % (num_conv_layers, num_fc_layers) return hyperparameter learning_rate = learning_rate if initializer_mode == 0: weights_initializer = tf.contrib.layers.xavier_initializer() biases_initializer = tf.constant_initializer(0.1) else: weights_initializer = tf.truncated_normal_initializer(stddev=0.1) biases_initializer = tf.constant_initializer(0.1) logdir = "logs/mnist/" + make_hyperparameter_string( learning_rate, initializer_mode, num_conv_layers, num_fc_layers) if not tf.gfile.Exists(logdir): tf.gfile.MakeDirs(logdir) def mnist_net(x): endpoints = {} with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=weights_initializer, biases_initializer=biases_initializer): with slim.arg_scope([slim.conv2d, slim.max_pool2d], stride=1, padding="SAME"): net = slim.conv2d(x, 32, [5, 5], scope="conv1") net = slim.max_pool2d(net, [2, 2], stride=2, scope="pool1") endpoints["block1"] = net if num_conv_layers == 2: net = slim.conv2d(net, 64, [5, 5], scope="conv2") net = slim.max_pool2d(net, [2, 2], stride=2, scope="pool2") endpoints["block2"] = net net = tf.reshape(net, shape=[-1, 7 * 7 * 64]) elif num_conv_layers == 1: net = tf.reshape(net, shape=[-1, 14 * 14 * 32]) if num_fc_layers == 1: logits = slim.fully_connected(net, 10, scope="fc") else: logits = slim.stack(net, slim.fully_connected, [1024, 10], scope="fc") endpoints["logits"] = logits return logits, endpoints # ValueError: Variable conv1/weights already exists, disallowed. # Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at: graph = tf.Graph() with graph.as_default(): with tf.name_scope("input"): images = tf.placeholder(tf.float32, shape=[None, 784], name="images") images_3d = tf.reshape(images, shape=[-1, 28, 28, 1], name="images_3d") labels = tf.placeholder(tf.uint8, shape=[None], name="labels") onehot_labels = tf.one_hot(indices=labels, depth=10, name="onehot_labels") logits, endpoints = mnist_net(images_3d) with tf.name_scope("loss"): # loss = slim.losses.softmax_cross_entropy(logits, onehot_labels) loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits) # 要注释掉这一行,否则会在 tensorboard 中出现两次 softmax_cross_entropy_loss,应该是上面的一行已经加了一次了 # tf.losses.add_loss(loss) # Letting TF-Slim know about the additional loss. total_loss = tf.losses.get_total_loss( add_regularization_losses=False) # tf.add_to_collection('EXTRA_LOSSES', total_loss) with tf.name_scope("accuracy"): correct_predictions = tf.equal( labels, tf.cast(tf.argmax(logits, axis=1), tf.uint8)) accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32)) with tf.name_scope("optimize"): optimizer = tf.train.AdamOptimizer(learning_rate) # create_train_op ensures that each time we ask for the loss, the update_ops # are run and the gradients being computed are applied too. train_op = slim.learning.create_train_op(total_loss, optimizer) # batch size 100 要比 30 好很多,也要稳很多 train_set = dataset.train("MNIST-data").cache().shuffle( buffer_size=1000).batch(100).repeat(num_epoch) test_set = dataset.test("MNIST-data").cache().batch(30).repeat() iterator = train_set.make_one_shot_iterator() one_element = iterator.get_next() iterator_test = test_set.make_one_shot_iterator() one_element_test = iterator_test.get_next() init_op = tf.global_variables_initializer() log_writer = tf.summary.FileWriter(logdir) # summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) summaries = set() for key in endpoints: summaries.add(tf.summary.histogram("block/" + key, endpoints[key])) for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) for loss in tf.get_collection(tf.GraphKeys.LOSSES): summaries.add(tf.summary.scalar(loss.op.name, loss)) # for loss in tf.get_collection('EXTRA_LOSSES'): # summaries.add(tf.summary.scalar(loss.op.name, loss)) # summaries.add(tf.summary.scalar("accuracy", accuracy)) accuracy_train_summary_op = tf.summary.scalar("accuracy_train", accuracy) accuracy_test_summary_op = tf.summary.scalar("accuracy_test", accuracy) summaries.add(tf.summary.image("image", images_3d, 4)) summary_op = tf.summary.merge(list(summaries), name='summary_op') step = 0 with tf.Session() as sess: log_writer.add_graph(sess.graph) sess.run(init_op) try: while True: images_, labels_ = sess.run(one_element) sess.run(train_op, feed_dict={ images: images_, labels: labels_ }) if step % 10 == 0: summary_, accuracy_train_summary = sess.run( [summary_op, accuracy_train_summary_op], feed_dict={ images: images_, labels: labels_ }) images_, labels_ = sess.run(one_element_test) accuracy_test_summary = sess.run( accuracy_test_summary_op, feed_dict={ images: images_, labels: labels_ }) log_writer.add_summary(summary_, step) log_writer.add_summary(accuracy_train_summary, step) log_writer.add_summary(accuracy_test_summary, step) step += 1 except tf.errors.OutOfRangeError: print("Finished") log_writer.close()
def grad(model, input ): with tf.GradientTape() as tape: loss_value = loss(model, input) return tape.gradient(loss_value, model.variables) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) checkpoint_dir = 'checkpoint/' checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") root = tfe.Checkpoint(optimizer=optimizer, model=model, optimizer_step=tf.train.get_or_create_global_step()) root.restore(tf.train.latest_checkpoint(checkpoint_dir)) number_of_test_images=10 test_data=dataset.train('./datasets') test_data=test_data.batch(number_of_test_images) iterator = test_data.make_one_shot_iterator() next_element,_ = iterator.get_next() input=tf.reshape(next_element,[next_element.shape[0],28,28,1])#(samples, rows, cols, channels) print("Final loss: {:.3f}".format(loss(model, input))) output=model(input) input_images=input.numpy() output_images=output.numpy() for i in range(input_images.shape[0]): input_image=np.squeeze(input_images[i,:,:,:]) output_image=np.squeeze(output_images[i,:,:,:]) print_image=np.hstack((input_image,output_image)) file_name='autoencoder_reconstructed_imgs/'+str(i)+'.png' scipy.misc.imsave(file_name, print_image)