def cifar10_model(learning_rate, objectiveFunc, hparam, act_func):
    tf.reset_default_graph()
    sess = tf.InteractiveSession(config=tf.ConfigProto(
        gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.4)))

    # Define input placeholders
    # images_placeholder - x
    x = tf.placeholder(tf.float32, shape=[None, IMAGE_PIXELS], name='images')
    x_image = tf.reshape(x, [-1, 32, 32, 1])
    tf.summary.image('input', x_image, 3)

    # labels_placeholder - y_
    y_ = tf.placeholder(tf.int64, shape=[None], name='image-labels')
    keep_prob = tf.placeholder(tf.float32)

    y = tf.one_hot(y_, 10, 1.0, 0.0, -1)

    h1, W1, B1 = fc_layer(x, IMAGE_PIXELS, 100, act_func, "h1")
    logit, W2, B2 = logits(h1, 100, 10)
    Y = tf.nn.softmax(logit)

    ## changing loss function
    if objectiveFunc == "mean_sq_err":
        with tf.name_scope("mean_sq_err"):
            mean_sq_err = tf.reduce_mean(
                tf.contrib.keras.losses.mean_squared_error(Y, y))
            tf.summary.scalar("mean_sq_err", mean_sq_err)
            loss = mean_sq_err
    elif objectiveFunc == "L2_norm":
        with tf.name_scope("L2_norm"):
            xent = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
                logits=logit, labels=y),
                                  name="xent")
            L2_lambda = 0.05
            L2_norm = xent + \
                      L2_lambda * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(B1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(B2))
            tf.summary.scalar("L2_norm", L2_norm)
            loss = L2_norm
    else:
        with tf.name_scope("xent"):
            xent = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
                logits=logit, labels=y),
                                  name="xent")
            tf.summary.scalar("xent", xent)
            loss = xent

    with tf.name_scope("train"):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy)

    summ = tf.summary.merge_all()

    sess.run(tf.global_variables_initializer())
    writer_train = tf.summary.FileWriter(LOGDIR + hparam + "_train")
    writer_train.add_graph(sess.graph)
    writer_test = tf.summary.FileWriter(LOGDIR + hparam + "_test")
    writer_test.add_graph(sess.graph)

    num_epochs = 200
    # training accuracy
    list_test_acc = list()

    # Generate input data batches
    zipped_data = zip(data_sets['images_train'], data_sets['labels_train'])
    # batch size 400, max steps 2000
    batches = data_helpers.gen_batch(list(zipped_data), 100,
                                     500 * 100 * num_epochs)

    for k in range(num_epochs):
        print(str(k) + "th epoch")
        for i in range(500):
            batch = next(batches)
            batch_xs, batch_ys = zip(*batch)
            feed_dict = {x: batch_xs, y_: batch_ys}
            if i % 100 == 0:
                [train_accuracy, s_train] = sess.run([accuracy, summ],
                                                     feed_dict=feed_dict)
                writer_train.add_summary(s_train, k * 500 + i)
                [test_accuracy,
                 s_test] = sess.run([accuracy, summ],
                                    feed_dict={
                                        x: data_sets['images_test'],
                                        y_: data_sets['labels_test']
                                    })
                writer_test.add_summary(s_test, k * 500 + i)
                print("train accuracy: " + str(train_accuracy))
                print("test accuracy: " + str(test_accuracy))
            sess.run(train_step, feed_dict=feed_dict)
        test_acc = accuracy.eval(feed_dict={
            x: data_sets['images_test'],
            y_: data_sets['labels_test']
        })
        list_test_acc.append(test_acc)
        if k > 10 and np.mean(list_test_acc[-10:-5]) > np.mean(
                list_test_acc[-5:]):
            print("Seems like it starts to overfit, aborting the training")
            break
# -----------------------------------------------------------------------------

with tf.Session() as sess:
  # Initialize variables and create summary-writer
  sess.run(tf.global_variables_initializer())
  # 创建一个汇总编辑器,使其定期将日志信息保存到磁盘。
  summary_writer = tf.summary.FileWriter(logdir, sess.graph)

  # Generate input data batches
  # 负责生成批输入数据。让我们假设我们有100个训练图像,批次大小为10。
  # 在softmax示例中,我们只为每次迭代选择了10个随机图像,特别注意是随机。
  zipped_data = zip(data_sets['images_train'], data_sets['labels_train'])

  # 对训练数据集的100个图像随机混洗。混洗之后的数据的前10个图像作为我们的第一个批次,
  # 接下来的10个图像是我们的第二批,后面的批次以此类推。10批后,在数据集的末尾,再重复混洗过程
  batches = data_helpers.gen_batch(list(zipped_data), FLAGS.batch_size,
    FLAGS.max_steps)

  for i in range(FLAGS.max_steps):

    # Get next input data batch
    batch = next(batches)
    images_batch, labels_batch = zip(*batch)
    feed_dict = {
      images_placeholder: images_batch,
      labels_placeholder: labels_batch
    }

    # Periodically print out the model's current accuracy
    if i % 100 == 0:
      train_accuracy = sess.run(accuracy, feed_dict=feed_dict)
      print('Step {:d}, training accuracy {:g}'.format(i, train_accuracy))
Пример #3
0
                                    name='image-labels')
logits = sigmoid_two_layer_inference(
    images_placeholder,
    pixel_count,
    FLAGS.hidden1,
    CLASSES,
    reg_constant=FLAGS.reg_constant)  # build model
loss = loss(logits, labels_placeholder)
train_step = training(loss, FLAGS.learning_rate)
accuracy = evaluation(logits, labels_placeholder)
report = report(logits, labels_placeholder)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    zipped_data = zip(data['images_train'], data['labels_train'])
    batches = gen_batch(list(zipped_data), FLAGS.batch_size, FLAGS.max_steps)

    for i in range(FLAGS.max_steps):
        batch = next(batches)
        images_batch, labels_batch = zip(*batch)
        feed_dict = {
            images_placeholder: images_batch,
            labels_placeholder: labels_batch
        }
        if i % 100 == 0:
            train_accuracy = sess.run(accuracy, feed_dict=feed_dict)
            print('Step {:d}, training accuracy {:g}'.format(
                i, train_accuracy))

        sess.run([train_step, loss], feed_dict=feed_dict)
Пример #4
0
def main(_):

  # cluster specification
  parameter_servers = ["spaceml1:2222"]
  workers = ["spaceml1:2223", "spaceml1:2224", "spaceml1:2225", "spaceml1:2226"]
  num_workers = len(workers)

  cluster = tf.train.ClusterSpec({"ps":parameter_servers, "worker":workers})

  #local server, either ps or worker
  server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index)
  data_sets = data_helpers.load_data() 

  if FLAGS.job_name == "ps":
    server.join()
  elif FLAGS.job_name == "worker":
    with tf.device(tf.train.replica_device_setter(worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)):
  # Create the model
        x = tf.placeholder(tf.float32, shape=[None, 224, 224, 3])
        y_ = tf.placeholder(tf.int64, shape=[None])
        keep_prob = tf.placeholder(tf.float32)
        x_reshaped = tf.reshape(x, [-1, 224, 224, 3])  
       #First convolutional layer, (224, 224, 3) to (56, 56, 96)    
        W_conv1 = weight_variable([11, 11, 3, 96]) 
       #W_conv1 = tf.Variable(tf.)   
        b_conv1 = bias_variable([96]) # convert it to (56,56,96) now     
        h_conv1 = tf.nn.relu(conv2d(x_reshaped, W_conv1, [1, 4, 4, 1]) + b_conv1)   
#       print h_conv1.get_shape()
       #max_pool1 = tf.nn.max_pool(h_conv1, ksize = [1,3,3,1], strides = [1,2,2,1], padding='SAME'     
       # (56,56,96)->(28,28,96)
        norm1 = tf.nn.lrn(h_conv1, 5, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75)   
        max_pool1 = tf.nn.max_pool(norm1, ksize = [1,3,3,1], strides = [1,2,2,1], padding='SAME')
 #      print max_pool1.get_shape()   
       #h_conv1 = tf.nn.relu(conv2d(x_reshaped, W_conv1, [1, 1, 1, 1]) + b_conv1 # 
       # Second convolutional layer, (28,28,96) to (28, 28, 256) to (14,14,256)    
        W_conv2 = weight_variable([5, 5, 96, 256])  
        b_conv2 = bias_variable([256])     
        h_conv2 = tf.nn.relu(conv2d(max_pool1, W_conv2, [1, 1, 1, 1]) + b_conv2) 
       #print h_conv2.get_shape()   
       #h_pool2 = tf.nn.max_pool(h_conv2, ksize = [1,3,3,1], strides = [1,2,2,1], padding='SAME'     
        norm2 = tf.nn.lrn(h_conv2, 5, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75)    
        h_pool2 = tf.nn.max_pool(norm2, ksize = [1,3,3,1], strides = [1,2,2,1], padding='SAME') # 
       #print h_pool2.get_shape()
       # Third convolutional layer, (14,14,256) to (14, 14, 384)     
        W_conv3 = weight_variable([3, 3, 256, 384])    
        b_conv3 = bias_variable([384])     
        h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3, [1, 1, 1, 1]) + b_conv3)
       #print h_conv3.get_shape()
       # # Fourth convolutional layer, (14, 14, 384) to (14, 14, 384)     
        W_conv4 = weight_variable([3, 3, 384, 384])    
        b_conv4 = bias_variable([384])     
        h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4, [1, 1, 1, 1]) + b_conv4) # 
       #print h_conv4.get_shape()
       # Fifth convolutional layer, (14, 14, 384) to (7, 7, 256)     
        W_conv5 = weight_variable([3, 3, 384, 256])    
        b_conv5 = bias_variable([256])     
        h_conv5 = tf.nn.relu(conv2d(h_conv4, W_conv5, [1, 1, 1, 1]) + b_conv5)     
        max_pooling5 = tf.nn.max_pool(h_conv5, ksize = [1,3,3,1], strides = [1,2,2,1], padding='SAME') # 
       #print max_pooling5.get_shape()
       # First fully-connected laye     
        W_fc1 = relu_weight_variable([7*7*256, 4096])
        b_fc1 = bias_variable([4096])     
        h_conv5_flat = tf.reshape(max_pooling5, [-1, 7*7*256])  
        #print h_conv5_flat.get_shape()   
        h_fc1 = tf.nn.relu(fc_batch_normalization(tf.matmul(h_conv5_flat, W_fc1) + b_fc1))     
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # 
       # Second fully-connected laye     
        W_fc2 = relu_weight_variable([4096,4096])
        b_fc2 = bias_variable([4096])     
        h_fc2 = tf.nn.relu(fc_batch_normalization(tf.matmul(h_fc1_drop, W_fc2) + b_fc2))   
        h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob) # 
       # Third fully-connected laye     
        W_fc3 = relu_weight_variable([4096, num_classes])    
        b_fc3 = bias_variable([num_classes])     
        y_score = fc_batch_normalization(tf.matmul(h_fc2_drop, W_fc3) + b_fc3)   
        y_logit = tf.nn.softmax(y_score)
     
        cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_score, labels=y_))
     
        train_step = tf.train.GradientDescentOptimizer(0.001).minimize(cross_entropy)
     
        correct_prediction = tf.equal(tf.argmax(y_logit, 1), y_)
     
       #y_max = tf.reduce_min(tf.reduce_max(y_logit,1))
       #y_label_max = tf.reduce_max(y_)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        #init_token_op = opt.get_init_tokens_op()
        #chief_queue_runner = opt.get_chief_queue_runner()

        saver = tf.train.Saver()     
        init_op = tf.global_variables_initializer()


        sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                             logdir="/mnt/ds3lab/litian/logs",
                             init_op=init_op, 
                             saver=saver)

        zipped_data = zip(data_sets['images_train'], data_sets['labels_train'])
        batches = data_helpers.gen_batch(list(zipped_data), 64, 50000)

        with sv.managed_session(server.target) as sess:
            begin = time.time()
            test_time = 0
            for i in range(50000):
                batch = next(batches)
                image_batch, label_batch = zip(*batch) 
                image_batch = np.reshape(test_batch, [-1,32,32,3])
                image_batch = tf.image.resize_images(image_batch,[224,224])
                image_batch = sess.run(image_batch)

                if i % 500 == 0 and (i / 500) % num_workers == FLAGS.task_index:
                    test_batch = data_sets['images_test']
                    test_batch = np.reshape(test_batch, [-1,32,32,3])
                    test_batch = tf.image.resize_images(test_batch,[224,224])
                    test_batch=sess.run(test_batch)
                    val_accuracy=[]
                    for i in range (0,100):
                        val_accuracy.append(sess.run(accuracy, feed_dict={x: test_batch[i*100:(i+1)*100], y_: data_sets['labels_test'][i*100:(i+1)*100], keep_prob:1.0}))

                    sum_ = 0
                    for i in range(0, len(val_accuracy)):
                      sum_ += val_accuracy[i]

                    avg_accuracy = sum_ / (100*1.0)
                    print("validation set accuracy %g" % avg_accuracy)
                
                sess.run(train_step, feed_dict={x: image_batch, y_: label_batch, keep_prob: 0.5})

                if i % 50 == 0:
                    train_accuracy = sess.run(accuracy,feed_dict={x: image_batch, y_: label_batch, keep_prob: 1.0})
                    train_loss = sess.run(cross_entropy, feed_dict={x: image_batch, y_: label_batch, keep_prob: 1.0})
                    localtime = time.asctime(time.localtime(time.time()))
                    print (localtime)
                    tmp = time.time()
                    print ((tmp - begin)/60.0)
                    print("step %d, training accuracy %g, training loss %g" % (i, train_accuracy, train_loss))
            #print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

        sv.stop()
Пример #5
0
import numpy as np
import time
from input_data_cifar import create_train_datasets
from input_data_cifar import create_test_datasets
import data_helpers
import tensorflow as tf

FLAGS = None

NUM_IMAGES = 5000
num_classes = 10

data_sets = data_helpers.load_data()

zipped_data = zip(data_sets['images_train'], data_sets['labels_train'])
batches = data_helpers.gen_batch(list(zipped_data), 64, 50000)

# one ps and four workers


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def relu_weight_variable(shape):
    assert len(shape) is 2
    input_size = shape[0]
    initial = tf.truncated_normal(shape, stddev=np.sqrt(2.0 / input_size))
    return tf.Variable(initial)
Пример #6
0
            step, summaries, loss, accuracy, error, wrong_predictions = sess.run(
                [global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.error, cnn.wrong_predictions],
                feed_dict)
            # np.set_printoptions(threshold=np.inf)
            # wrong_pred_data=y_batch[wrong_predictions,:]
            # _, wrong_cls=np.where(wrong_pred_data==1)
#            print("wrong predictions: {}".format(Counter(wrong_cls)))
#            time_str = datetime.datetime.now().isoformat()
#            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) 
            # if writer:
            #     writer.add_summary(summaries, step)
            # else:
            return step, summaries, loss, accuracy, error

        for epoch in range(FLAGS.num_epochs):
            batches=data_helpers.gen_batch(
                list(zip(x_train, y_train)), FLAGS.batch_size)

            for batch in batches:
                x_batch, y_batch = zip(*batch)
                tr_step, tr_summaries, tr_loss, tr_accuracy, tr_error =train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)

            if epoch % FLAGS.evaluate_every == 0:
                _, tr_summaries, tr_loss, tr_accuracy, tr_error =dev_step(x_train, y_train, writer=None)
                _, te_summaries, te_loss, te_accuracy, te_error =dev_step(x_dev, y_dev, writer=None)
                # write to summary
                train_summary_writer.add_summary(tr_summaries, epoch)
                dev_summary_writer.add_summary(te_summaries, epoch)
                train_summary_writer.flush()
                dev_summary_writer.flush()
                time_str = datetime.datetime.now().isoformat()
def cifar10_model(learning_rate, regularization, hparam, dropout_rate,
                  n_hidden_layer, n_hidden_unit, act_func):
    tf.reset_default_graph()
    sess = tf.InteractiveSession(config=tf.ConfigProto(
        gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.7)))

    # input layer
    x = tf.placeholder(tf.float32, shape=[None, IMAGE_PIXELS], name='images')
    x_image = tf.reshape(x, [-1, 32, 32, 1])
    tf.summary.image('input', x_image, 3)

    # label to compare
    y_ = tf.placeholder(tf.int64, shape=[None], name='image-labels')
    keep_prob = tf.placeholder(tf.float32)
    y = tf.one_hot(y_, 10, 1.0, 0.0, -1)

    layers = []
    if regularization == "drop_out":
        for i in range(n_hidden_layer):
            if i == 0:
                layers.insert(
                    i,
                    tf.nn.dropout(
                        fc_layer(x, IMAGE_PIXELS, n_hidden_unit, act_func,
                                 "h" + str(i + 1)), keep_prob))
            else:
                layers.insert(
                    i,
                    tf.nn.dropout(
                        fc_layer(layers[i - 1], n_hidden_unit, n_hidden_unit,
                                 act_func, "h" + str(i + 1)), keep_prob))
        logit, W, B = logits(layers[n_hidden_layer - 1], n_hidden_unit, 10)

    elif regularization == 'batch_normalization':
        for i in range(n_hidden_layer):
            if i == 0:
                layers.insert(
                    i,
                    batch_layer(x, IMAGE_PIXELS, n_hidden_unit, act_func,
                                "h" + str(i + 1)))
            else:
                layers.insert(
                    i,
                    batch_layer(layers[i - 1], n_hidden_unit, n_hidden_unit,
                                act_func, "h" + str(i + 1)))
        logit = batch_logits(layers[n_hidden_layer - 1], n_hidden_unit, 10,
                             act_func)

    else:
        for i in range(n_hidden_layer):
            if i == 0:
                layers.insert(
                    i,
                    fc_layer(x, IMAGE_PIXELS, n_hidden_unit, act_func,
                             "h" + str(i + 1)))
            else:
                layers.insert(
                    i,
                    fc_layer(layers[i - 1], n_hidden_unit, n_hidden_unit,
                             act_func, "h" + str(i + 1)))
        logit, W, B = logits(layers[n_hidden_layer - 1], n_hidden_unit, 10)

    ## softmax layer - last layer for classification
    Y = tf.nn.softmax(logit)

    # loss function
    with tf.name_scope("xent"):
        xent = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            logits=logit, labels=y),
                              name="xent")
        tf.summary.scalar("xent", xent)

    with tf.name_scope("train"):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)

    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy)

    summ = tf.summary.merge_all()

    sess.run(tf.global_variables_initializer())
    writer_train = tf.summary.FileWriter(LOGDIR + hparam + "_train")
    writer_train.add_graph(sess.graph)
    writer_test = tf.summary.FileWriter(LOGDIR + hparam + "_test")
    writer_test.add_graph(sess.graph)

    num_epochs = 200
    # training accuracy
    list_test_acc = list()

    # Generate input data batches
    zipped_data = zip(data_sets['images_train'], data_sets['labels_train'])
    # batch size : 100, max steps : (steps in a single epoch) * num of epochs
    batches = data_helpers.gen_batch(list(zipped_data), 100,
                                     500 * 100 * num_epochs)

    for k in range(num_epochs):
        print(str(k) + "th epoch")
        for i in range(500):
            batch = next(batches)
            batch_xs, batch_ys = zip(*batch)
            if i % 100 == 0:
                [train_accuracy, s_train] = sess.run([accuracy, summ],
                                                     feed_dict={
                                                         x: batch_xs,
                                                         y_: batch_ys,
                                                         keep_prob: 1
                                                     })
                writer_train.add_summary(s_train, k * 500 + i)
                [test_accuracy, s_test] = sess.run(
                    [accuracy, summ],
                    feed_dict={
                        x: data_sets['images_test'],
                        y_: data_sets['labels_test'],
                        keep_prob: 1
                    })
                writer_test.add_summary(s_test, k * 500 + i)
                print('Step {:d}, training accuracy {:g}'.format(
                    k * 500 + i, train_accuracy))
                print('Step {:d}, test accuracy {:g}'.format(
                    k * 500 + i, test_accuracy))
            # dropout_rate will only be used when dropout is enabled
            sess.run(train_step,
                     feed_dict={
                         x: batch_xs,
                         y_: batch_ys,
                         keep_prob: dropout_rate
                     })
        test_acc = accuracy.eval(
            feed_dict={
                x: data_sets['images_test'],
                y_: data_sets['labels_test'],
                keep_prob: 1
            })
        list_test_acc.append(test_acc)
        # use early stopping
        if k > 10 and np.mean(list_test_acc[-10:-5]) > np.mean(
                list_test_acc[-5:]):
            print("Seems like it starts to overfit, aborting the training")
            break
def main(_):

    # cluster specification
    #  parameter_servers = ["sgs-gpu-02:2222", "sgs-gpu-02:2223", "sgs-gpu-03:2222", "sgs-gpu-03:2223"]
    #  workers = ["sgs-gpu-02:2224", "sgs-gpu-02:2225", "sgs-gpu-03:2224", "sgs-gpu-03:2225"]
    parameter_servers = [
        "spaceml1:2222", "spaceml1:2223", "spaceml1:2224", "spaceml1:2225"
    ]
    workers = [
        "spaceml1:2226", "spaceml1:2227", "spaceml1:2228", "spaceml1:2229"
    ]

    num_ps = len(parameter_servers)
    num_worker = num_ps

    cluster = tf.train.ClusterSpec({
        "ps": parameter_servers,
        "worker": workers
    })

    #local server, either ps or worker
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)

    data_sets = data_helpers.load_data()

    W1 = [0, 0, 0, 0]
    b1 = [0, 0, 0, 0]
    W2 = [0, 0, 0, 0]
    b2 = [0, 0, 0, 0]

    if FLAGS.job_name == "ps":
        server.join()
    elif FLAGS.job_name == "worker":
        with tf.device("/job:ps/task:0"):
            W1[0] = tf.get_variable(
                name='w10',
                shape=[3072, 240],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / np.sqrt(float(3072))),
                regularizer=tf.contrib.layers.l2_regularizer(0.1))
            #        W1[0] = tf.Variable(tf.random_normal([3072,240]))
            b1[0] = tf.Variable(tf.zeros([240]))
            W2[0] = tf.get_variable(
                name='w20',
                shape=[240, 10],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / np.sqrt(float(120))),
                regularizer=tf.contrib.layers.l2_regularizer(0.1))
            #W2[0] = tf.Variable(tf.random_normal([240,10]))
            b2[0] = tf.Variable(tf.zeros([10]))
        with tf.device("/job:ps/task:1"):
            W1[1] = tf.get_variable(
                name='w11',
                shape=[3072, 240],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / np.sqrt(float(3072))),
                regularizer=tf.contrib.layers.l2_regularizer(0.1))
            #W1[1] = tf.Variable(tf.random_normal([3072,240]))
            b1[1] = tf.Variable(tf.zeros([240]))
            W2[1] = tf.get_variable(
                name='w21',
                shape=[240, 10],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / np.sqrt(float(120))),
                regularizer=tf.contrib.layers.l2_regularizer(0.1))
            # W2[1] = tf.Variable(tf.random_normal([240,10]))
            b2[1] = tf.Variable(tf.zeros([10]))
        with tf.device("/job:ps/task:2"):
            W1[2] = tf.get_variable(
                name='w12',
                shape=[3072, 240],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / np.sqrt(float(3072))),
                regularizer=tf.contrib.layers.l2_regularizer(0.1))

            #W1[2] = tf.Variable(tf.random_normal([3072,240]))
            b1[2] = tf.Variable(tf.zeros([240]))
            W2[2] = tf.get_variable(
                name='w22',
                shape=[240, 10],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / np.sqrt(float(120))),
                regularizer=tf.contrib.layers.l2_regularizer(0.1))

            #W2[2] = tf.Variable(tf.random_normal([240,10]))
            b2[2] = tf.Variable(tf.zeros([10]))
        with tf.device("/job:ps/task:3"):
            W1[3] = tf.get_variable(
                name='w13',
                shape=[3072, 240],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / np.sqrt(float(3072))),
                regularizer=tf.contrib.layers.l2_regularizer(0.1))
            # W1[3] = tf.Variable(tf.random_normal([3072,240]))
            b1[3] = tf.Variable(tf.zeros([240]))
            W2[3] = tf.get_variable(
                name='w23',
                shape=[240, 10],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / np.sqrt(float(120))),
                regularizer=tf.contrib.layers.l2_regularizer(0.1))
            #W2[3] = tf.Variable(tf.random_normal([240,10]))
            b2[3] = tf.Variable(tf.zeros([10]))

        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:%d" % FLAGS.task_index,
                    cluster=cluster)):

            # Create the model
            x = tf.placeholder(tf.float32, shape=[None, 3072])
            y_ = tf.placeholder(tf.int64, shape=[None])

            h1 = tf.nn.relu(
                tf.matmul(x, W1[FLAGS.task_index]) + b1[FLAGS.task_index])

            y = tf.matmul(h1, W2[FLAGS.task_index]) + b2[FLAGS.task_index]

            cross_entropy = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_,
                                                               logits=y))

            opt = tf.train.GradientDescentOptimizer(FLAGS.lr)

            grads_and_vars = opt.compute_gradients(cross_entropy, [
                W1[FLAGS.task_index], b1[FLAGS.task_index],
                W2[FLAGS.task_index], b2[FLAGS.task_index]
            ])

            #	w = W2[FLAGS.task_index]
            #	b = b2[FLAGS.task_index]
            new_gv0 = (grads_and_vars[0][0] -
                       (W1[(FLAGS.task_index - 1) % num_ps] + W1[
                           (FLAGS.task_index + 1) % num_ps] -
                        2 * W1[FLAGS.task_index]) / (3 * FLAGS.lr * 1.0),
                       grads_and_vars[0][1])
            new_gv1 = (grads_and_vars[1][0] -
                       (b1[(FLAGS.task_index - 1) % num_ps] + b1[
                           (FLAGS.task_index + 1) % num_ps] -
                        2 * b1[FLAGS.task_index]) / (3 * FLAGS.lr * 1.0),
                       grads_and_vars[1][1])
            new_gv2 = (grads_and_vars[2][0] -
                       (W2[(FLAGS.task_index - 1) % num_ps] + W2[
                           (FLAGS.task_index + 1) % num_ps] -
                        2 * W2[FLAGS.task_index]) / (3 * FLAGS.lr * 1.0),
                       grads_and_vars[2][1])
            new_gv3 = (grads_and_vars[3][0] -
                       (b2[(FLAGS.task_index - 1) % num_ps] + b2[
                           (FLAGS.task_index + 1) % num_ps] -
                        2 * b2[FLAGS.task_index]) / (3 * FLAGS.lr * 1.0),
                       grads_and_vars[3][1])

            #print b1[FLAGS.task_index]
            g = grads_and_vars[1][0]
            new_gv = list()
            new_gv.append(new_gv0)
            new_gv.append(new_gv1)
            new_gv.append(new_gv2)
            new_gv.append(new_gv3)

            train_step = opt.apply_gradients(new_gv)

            correct_prediction = tf.equal(tf.argmax(y, 1), y_)
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

            saver = tf.train.Saver()
            init_op = tf.global_variables_initializer()

            sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                                     logdir="/mnt/ds3lab/litian/logs",
                                     init_op=init_op,
                                     saver=saver)

            zipped_data = zip(data_sets['images_train'],
                              data_sets['labels_train'])
            batches = data_helpers.gen_batch(list(zipped_data), 128, 50000)

            with sv.managed_session(server.target) as sess:
                begin = time.time()
                for i in range(50000):
                    batch = next(batches)
                    image_batch, label_batch = zip(*batch)
                    sess.run(train_step,
                             feed_dict={
                                 x: image_batch,
                                 y_: label_batch
                             })

                    if i % 50 == 0:
                        train_accuracy = sess.run(accuracy,
                                                  feed_dict={
                                                      x: image_batch,
                                                      y_: label_batch
                                                  })
                        train_loss = sess.run(cross_entropy,
                                              feed_dict={
                                                  x: image_batch,
                                                  y_: label_batch
                                              })
                        localtime = time.asctime(time.localtime(time.time()))
                        print(localtime)
                        tmp = time.time()
                        print((tmp - begin) / 60.0)

                        print(
                            "step %d, training accuracy %g, training loss %g" %
                            (i, train_accuracy, train_loss))
            sv.stop()
Пример #9
0
def main(_):

    #  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
    #  list_ = []
    #  for line in open("/mnt/ds3lab/litian/input_data/cifar10/label3.txt"):
    #      list_.append(['a', line.strip('\n')])
    #  classes = np.array(list_)
    #  print (len(classes))

    #  train_dataset, mean, std = create_train_datasets(classes[:, 1], num_samples=NUM_IMAGES)
    #  val_dataset = create_test_datasets(classes[:, 1], mean, std, num_samples=NUM_IMAGES)

    #  val_images, val_labels = val_dataset.next_batch(20)

    #  num_classes = len(classes)
    #  print (num_classes)
    data_sets = data_helpers.load_data()

    #  with tf.device('/gpu:0'):

    # Create the model
    x = tf.placeholder(tf.float32, shape=[None, 3072])
    y_ = tf.placeholder(tf.int64, shape=[None])

    w1 = tf.get_variable(name='w1',
                         shape=[3072, 240],
                         initializer=tf.truncated_normal_initializer(
                             stddev=1.0 / np.sqrt(float(3072))),
                         regularizer=tf.contrib.layers.l2_regularizer(0.1))
    b1 = tf.Variable(tf.zeros([240]))
    h1 = tf.nn.relu(tf.matmul(x, w1) + b1)

    w2 = tf.get_variable(name='w2',
                         shape=[240, 10],
                         initializer=tf.truncated_normal_initializer(
                             stddev=1.0 / np.sqrt(float(240))),
                         regularizer=tf.contrib.layers.l2_regularizer(0.1))
    b2 = tf.Variable(tf.zeros([10]))
    y = tf.matmul(h1, w2) + b2

    cross_entropy = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_, logits=y))

    train_step = tf.train.GradientDescentOptimizer(0.0005).minimize(
        cross_entropy)

    correct_prediction = tf.equal(tf.argmax(y, 1), y_)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    zipped_data = zip(data_sets['images_train'], data_sets['labels_train'])
    batches = data_helpers.gen_batch(list(zipped_data), 128, 50000)
    for i in range(50000):
        #              batch_xs, batch_ys = mnist.train.next_batch(100)
        #   image_batch, label_batch = train_dataset.next_batch(60, random_crop=True)
        batch = next(batches)
        image_batch, label_batch = zip(*batch)
        sess.run(train_step, feed_dict={x: image_batch, y_: label_batch})

        if i % 50 == 0:
            train_accuracy = sess.run(accuracy,
                                      feed_dict={
                                          x: image_batch,
                                          y_: label_batch
                                      })
            train_loss = sess.run(cross_entropy,
                                  feed_dict={
                                      x: image_batch,
                                      y_: label_batch
                                  })
            localtime = time.asctime(time.localtime(time.time()))
            print(localtime)
            print("step %d, training accuracy %g, training loss %g" %
                  (i, train_accuracy, train_loss))
        if i % 500 == 0:
            val_accuracy = sess.run(accuracy,
                                    feed_dict={
                                        x: data_sets['images_test'],
                                        y_: data_sets['labels_test']
                                    })
            print("validation set accuracy %g" % val_accuracy)
def main(_):

    # cluster specification
    parameter_servers = ["spaceml1:2222"]
    workers = [
        "spaceml1:2223", "spaceml1:2224", "spaceml1:2225", "spaceml1:2226"
    ]
    num_workers = len(workers)

    cluster = tf.train.ClusterSpec({
        "ps": parameter_servers,
        "worker": workers
    })

    #local server, either ps or worker
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)
    data_sets = data_helpers.load_data()

    if FLAGS.job_name == "ps":
        server.join()
    elif FLAGS.job_name == "worker":
        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:%d" % FLAGS.task_index,
                    cluster=cluster)):
            # Create the model
            x = tf.placeholder(tf.float32, shape=[None, 3072])
            y_ = tf.placeholder(tf.int64, shape=[None])

            w1 = tf.get_variable(
                name='w1',
                shape=[3072, 240],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / np.sqrt(float(3072))),
                regularizer=tf.contrib.layers.l2_regularizer(0.1))
            b1 = tf.Variable(tf.zeros([240]))

            h1 = tf.nn.relu(tf.matmul(x, w1) + b1)

            w2 = tf.get_variable(
                name='w2',
                shape=[240, 10],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / np.sqrt(float(240))),
                regularizer=tf.contrib.layers.l2_regularizer(0.1))
            b2 = tf.Variable(tf.zeros([10]))
            y = tf.matmul(h1, w2) + b2

            cross_entropy = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_,
                                                               logits=y))

            train_step = tf.train.GradientDescentOptimizer(0.0005).minimize(
                cross_entropy)

            correct_prediction = tf.equal(tf.argmax(y, 1), y_)
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

            #init_token_op = opt.get_init_tokens_op()
            #chief_queue_runner = opt.get_chief_queue_runner()

            saver = tf.train.Saver()
            init_op = tf.global_variables_initializer()

            sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                                     logdir="/mnt/ds3lab/litian/logs",
                                     init_op=init_op,
                                     saver=saver)

            zipped_data = zip(data_sets['images_train'],
                              data_sets['labels_train'])
            batches = data_helpers.gen_batch(list(zipped_data), 128, 40000)

            with sv.managed_session(server.target) as sess:
                begin = time.time()
                for i in range(40000):
                    batch = next(batches)
                    image_batch, label_batch = zip(*batch)
                    sess.run(train_step,
                             feed_dict={
                                 x: image_batch,
                                 y_: label_batch
                             })

                    if i % 50 == 0:
                        train_accuracy = sess.run(accuracy,
                                                  feed_dict={
                                                      x: image_batch,
                                                      y_: label_batch
                                                  })
                        train_loss = sess.run(cross_entropy,
                                              feed_dict={
                                                  x: image_batch,
                                                  y_: label_batch
                                              })
                        localtime = time.asctime(time.localtime(time.time()))
                        print(localtime)
                        tmp = time.time()
                        print((tmp - begin) / 60.0)
                        print(
                            "step %d, training accuracy %g, training loss %g" %
                            (i, train_accuracy, train_loss))
                #print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

            sv.stop()
Пример #11
0
def main(_):

  # cluster specification
  # in order to prevent ps from occupying GPUs, first start workers, then start parameter servers

  parameter_servers = ["sgs-gpu-02:2222"]
  workers = ["sgs-gpu-02:2223", "sgs-gpu-02:2224", "sgs-gpu-03:2222", "sgs-gpu-03:2223"]
  num_workers = len(workers)

  cluster = tf.train.ClusterSpec({"ps":parameter_servers, "worker":workers})

  #local server, either ps or worker
  server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index)
  data_sets = data_helpers.load_data() 

  if FLAGS.job_name == "ps":
    server.join()
  elif FLAGS.job_name == "worker":
    with tf.device(tf.train.replica_device_setter(worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)):
  # Create the model
        global_step = tf.get_variable('global_step', [], initializer = tf.constant_initializer(0), trainable = False)

        x = tf.placeholder(tf.float32, shape = [None, 3072])
        y_ = tf.placeholder(tf.int64, shape=[None])

        w1 = tf.get_variable(name='w1',shape=[3072,240], initializer=tf.truncated_normal_initializer(stddev=1.0/np.sqrt(float(3072))),
          regularizer=tf.contrib.layers.l2_regularizer(0.1))
        b1 = tf.Variable(tf.zeros([240]))
        h1 = tf.nn.relu(tf.matmul(x, w1)+b1)

        w2 = tf.get_variable(name='w2', shape=[240,10], initializer=tf.truncated_normal_initializer(stddev=1.0/np.sqrt(float(120))),
          regularizer=tf.contrib.layers.l2_regularizer(0.1))
        b2 = tf.Variable(tf.zeros([10]))
        y = tf.matmul(h1, w2) + b2


        cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_, logits=y))

        opt = tf.train.GradientDescentOptimizer(0.0005)
        opt = tf.train.SyncReplicasOptimizer(opt, replicas_to_aggregate = num_workers, total_num_replicas = num_workers)

        train_step = opt.minimize(cross_entropy, global_step = global_step)

        correct_prediction = tf.equal(tf.argmax(y, 1),y_)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        #init_token_op = opt.get_init_tokens_op()
        #chief_queue_runner = opt.get_chief_queue_runner()

        saver = tf.train.Saver()     
        init_op = tf.global_variables_initializer()

        init_token_op = opt.get_init_tokens_op()
        chief_queue_runner = opt.get_chief_queue_runner()

        sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                             logdir="/mnt/ds3lab/litian/logs",
                             init_op=init_op, 
                             saver=saver, global_step = global_step)

        zipped_data = zip(data_sets['images_train'], data_sets['labels_train'])
        batches = data_helpers.gen_batch(list(zipped_data), 128, 50000)

        # start a session
        sess = sv.prepare_or_wait_for_session(server.target)

        if FLAGS.task_index == 0:
          sv.start_queue_runners(sess, [chief_queue_runner])
          sess.run(init_token_op)

        for i in range(50000):
            batch = next(batches)
            image_batch, label_batch=zip(*batch) 
            sess.run(train_step, feed_dict={x: image_batch, y_: label_batch})

            if i % 50 == 0:
                train_accuracy = sess.run(accuracy,feed_dict={x: image_batch, y_: label_batch})
                train_loss = sess.run(cross_entropy, feed_dict={x: image_batch, y_: label_batch})
                localtime = time.asctime(time.localtime(time.time()))
                print (localtime)
                print("step %d, training accuracy %g, training loss %g" % (i, train_accuracy, train_loss))
	          if i % 500 == 0 :
		            val_accuracy = sess.run(accuracy, feed_dict={x: data_sets['images_test'], y_: data_sets['labels_test']})
                print("validation set accuracy %g" % val_accuracy)
            #print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

        sv.stop()