Exemplo n.º 1
0
def train(mnist):
    x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name="x-input")
    y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name="y-input")
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    y = mnist_inference.inference(x, regularizer)
    global_step = tf.Variable(0, trainable=False)

    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variable_averages_op = variable_averages.apply(tf.trainable_variables())
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses"))

    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,
                                               global_step,
                                               mnist.train.num_examples/BATCH_SIZE,
                                               LEARNING_RATE_DECAY)
    train_step = tf.train.GradientDescentOptimizer(learning_rate)\
        .minimize(loss, global_step=global_step)
    with tf.control_dependencies([train_step, variable_averages_op]):
        train_op = tf.no_op(name='train')
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            _, loss_value, setp = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
            if i % 1000 == 0:
                print("%d 训练后,损失值为 %g" % (i, loss_value))
                saver.save(sess, MODEL_SAVE_PATH, global_step=global_step)
Exemplo n.º 2
0
  def testSplitApplyMerge(self):
    # Repeatability.  SGD has a tendency to jump around, even here.
    tf.set_random_seed(1)

    with self.test_session() as sess:
      # Use sampling to train REINFORCE
      with st.value_type(st.SampleAndReshapeValue(n=1)):
        (route_selection,
         routing_loss,
         final_loss) = build_split_apply_merge_model()

      sgd = tf.train.GradientDescentOptimizer(1.0).minimize(final_loss)

      tf.global_variables_initializer().run()

      for i in range(10):
        # Run loss and inference step.  This toy problem converges VERY quickly.
        (routing_loss_v, final_loss_v, route_selection_v, _) = sess.run(
            [routing_loss, final_loss, tf.identity(route_selection), sgd])
        print(
            "Iteration %d, routing loss: %s, final_loss: %s, "
            "route selection: %s"
            % (i, routing_loss_v, final_loss_v, route_selection_v))

      self.assertAllEqual([0, 0, 1, 1], route_selection_v)
      self.assertAllClose([0.0, 0.0, 0.0, 0.0], routing_loss_v)
      self.assertAllClose(0.0, final_loss_v)
Exemplo n.º 3
0
  def testConv2DReflect(self):
    np.random.seed(768798)

    x_shape = [4, 10, 12, 6]
    f_shape = [3, 4, 6, 5]
    strides = [1, 2, 2, 1]
    padding = 'REFLECT'

    conv = blocks_std.Conv2D(depth=f_shape[-1],
                             filter_size=f_shape[0:2],
                             strides=strides[1:3],
                             padding=padding,
                             act=None,
                             bias=None)
    x_value = np.random.normal(size=x_shape)
    x = tf.convert_to_tensor(x_value, dtype=tf.float32)
    y = conv(x)

    with self.test_session():
      tf.global_variables_initializer().run()
      f_value = conv._kernel.eval()
      y_value = y.eval()

    y_expected = _NumpyConv2D(x_value, f_value,
                              strides=strides, padding=padding)
    self.assertAllClose(y_expected, y_value)
Exemplo n.º 4
0
  def testAdagradDAWithL1(self):
    for dtype in [tf.float64, tf.float32]:
      with self.test_session() as sess:
        global_step = tf.Variable(0, dtype=tf.int64)
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([4.0, 3.0], dtype=dtype)
        grads0 = tf.constant([0.1, 0.2], dtype=dtype)
        grads1 = tf.constant([0.01, 0.02], dtype=dtype)

        opt = tf.train.AdagradDAOptimizer(
            3.0,
            global_step,
            initial_gradient_squared_accumulator_value=0.1,
            l1_regularization_strength=0.001,
            l2_regularization_strength=0.0)
        update = opt.apply_gradients(
            zip([grads0, grads1], [var0, var1]), global_step=global_step)
        tf.global_variables_initializer().run()

        v0_val, v1_val = sess.run([var0, var1])
        self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
        self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)

        # Run a step of AdagradDA
        update.run()

        v0_val, v1_val = sess.run([var0, var1])
        self.assertAllCloseAccordingToType(
            np.array([-0.895489, -1.59555]), v0_val)
        self.assertAllCloseAccordingToType(
            np.array([-0.085339, -0.17989]), v1_val)
Exemplo n.º 5
0
    def init_training_graph(self):

        with tf.name_scope('Evaluation'):
            # self.logits = self.conv_layer_f(self.last, self.logits_weight, strides=[1,1,1,1], scope_name="logits/")
            with tf.name_scope("logits/"):
                self.logits2 = tf.nn.conv2d(self.last, self.logits_weight, strides=[1,1,1,1], padding="VALID")
                self.logits = tf.nn.bias_add(self.logits2, self.logits_biases)
            self.predictions = self.logits
            #self.predictions = tf.squeeze(self.logits, [3])
            #softmax = tf.nn.softmax(self.logits)
            #print softmax.get_shape()
            #self.predictions = tf.slice(softmax, [0, 0, 0, 0], [-1, -1, -1, 1])
            with tf.name_scope('Loss'):

                self.loss = tf.reduce_mean(tf.losses.mean_squared_error(self.logits, self.train_labels_node))
                #self.loss = tf.reduce_mean(tf.losses.mean_squared_error(self.predictions, self.train_labels_node))
                tf.summary.scalar("mean_squared_error", self.loss)
            self.predictions = tf.squeeze(self.predictions, [3])
            self.train_prediction = self.predictions

            self.test_prediction = self.predictions

        tf.global_variables_initializer().run()

        print('Computational graph initialised')
Exemplo n.º 6
0
 def testBasic(self):
   for dtype in [tf.half, tf.float32, tf.float64]:
     with self.test_session():
       var0 = tf.Variable([1.1, 2.1], dtype=dtype)
       var1 = tf.Variable([3., 4.], dtype=dtype)
       grads0 = tf.constant([0.1, 0.1], dtype=dtype)
       grads1 = tf.constant([0.01, 0.01], dtype=dtype)
       decay_rate = 0.53
       sgd_optimizer = tfp.optimizer.StochasticGradientLangevinDynamics(
           3., preconditioner_decay_rate=decay_rate)
       sgd_op = sgd_optimizer.apply_gradients(
           zip([grads0, grads1], [var0, var1]))
       tf.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval())
       self.assertAllCloseAccordingToType([3., 4.], var1.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params
       grads_scaled = (0.5 * 0.1 /
                       np.sqrt(decay_rate + (1. - decay_rate) * 0.1**2 + 1e-8))
       # Note that `tfp.math.diag_jacobian(xs=var, ys=grad)` returns zero
       # tensor
       self.assertAllCloseAccordingToType(
           [1.1 - 3. * grads_scaled, 2.1 - 3. * grads_scaled], var0.eval())
       grads_scaled = (0.5 * 0.01 / np.sqrt(
           decay_rate + (1. - decay_rate) * 0.01**2 + 1e-8))
       self.assertAllCloseAccordingToType(
           [3. - 3. * grads_scaled, 4. - 3. * grads_scaled], var1.eval())
       self.assertAllCloseAccordingToType(1, sgd_optimizer._counter.eval())
Exemplo n.º 7
0
 def testYesShuffle(self):
   id_source = rs.ReaderSource(reader_cls=tf.IdentityReader,
                               work_units=self.work_units,
                               batch_size=1,
                               shuffle=True,
                               num_threads=10,
                               seed=1234)
   index_column, value_column = id_source()
   cache = {}
   index_tensor = index_column.build(cache)
   value_tensor = value_column.build(cache)
   self.assertEqual([1], index_tensor.get_shape().as_list())
   self.assertEqual([1], value_tensor.get_shape().as_list())
   seen = set([])
   with self.test_session() as sess:
     tf.global_variables_initializer().run()
     coord = tf.train.Coordinator()
     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
     for _ in range(500):
       index, value = sess.run([index_tensor, value_tensor])
       self.assertEqual(index, value)
       self.assertNotIn(int(value[0]), seen)
       seen.add(int(value[0]))
     coord.request_stop()
     coord.join(threads)
Exemplo n.º 8
0
  def testDepthOrderKernel(self):
    kernel_size = 1
    input_depth = 7
    output_depth = input_depth
    kernel_shape = [kernel_size, kernel_size, input_depth, output_depth]

    kernel_feed = np.ones(kernel_shape)
    x_shape = [5] * 3 + [input_depth]
    x_feed = np.ones(x_shape)
    y_expected = np.zeros(x_shape[0:3] + [output_depth])
    y_expected[:, :, :] = np.arange(output_depth)

    init_kernel = lambda s, t: tf.constant(kernel_feed, dtype=t, shape=s)
    masked_conv2d = blocks_masked_conv2d.DepthOrderConv2D(
        output_depth, [kernel_size] * 2, [1] * 2, 'SAME',
        strict_order=True,
        initializer=init_kernel)
    x = tf.placeholder(dtype=tf.float32, shape=x_shape)
    y = masked_conv2d(x)

    with self.test_session():
      tf.global_variables_initializer().run()
      y_value = y.eval(feed_dict={x: x_feed})

    self.assertAllEqual(y_expected, y_value)
Exemplo n.º 9
0
def basic_operation():
    v1 = tf.Variable(10)
    v2 = tf.Variable(5)
    addv = v1 + v2
    print(addv)
    print(type(addv))
    print(type(v1))

    c1 = tf.constant(10)
    c2 = tf.constant(5)
    addc = c1 + c2
    print(addc)
    print(type(addc))
    print(type(c1))

    # 用来运行计算图谱的对象/实例?
    # session is a runtime
    sess = tf.Session()

    # Variable -> 初始化 -> 有值的Tensor
    tf.global_variables_initializer().run(session=sess)

    print('变量是需要初始化的')
    print('加法(v1, v2) = ', addv.eval(session=sess))
    print('加法(v1, v2) = ', sess.run(addv))
    print('加法(c1, c2) = ', addc.eval(session=sess))
Exemplo n.º 10
0
  def testSameVariablesClear(self):
    server = tf.train.Server.create_local_server()

    # Creates a graph with 2 variables.
    v0 = tf.Variable([[2, 1]], name="v0")
    v1 = tf.Variable([[1], [2]], name="v1")
    v2 = tf.matmul(v0, v1)

    # Verifies that both sessions connecting to the same target return
    # the same results.
    sess_1 = tf.Session(server.target)
    sess_2 = tf.Session(server.target)
    sess_1.run(tf.global_variables_initializer())
    self.assertAllEqual([[4]], sess_1.run(v2))
    self.assertAllEqual([[4]], sess_2.run(v2))

    # Resets target. sessions abort. Use sess_2 to verify.
    tf.Session.reset(server.target)
    with self.assertRaises(tf.errors.AbortedError):
      self.assertAllEqual([[4]], sess_2.run(v2))

    # Connects to the same target. Device memory for the variables would have
    # been released, so they will be uninitialized.
    sess_2 = tf.Session(server.target)
    with self.assertRaises(tf.errors.FailedPreconditionError):
      sess_2.run(v2)
    # Reinitializes the variables.
    sess_2.run(tf.global_variables_initializer())
    self.assertAllEqual([[4]], sess_2.run(v2))
    sess_2.close()
Exemplo n.º 11
0
  def testRasterScanKernel(self):
    kernel_size = 5
    input_depth = 1
    output_depth = 1
    kernel_shape = [kernel_size, kernel_size, input_depth, output_depth]

    # pylint: disable=bad-whitespace
    kernel_feed = [[ 1.0,  2.0,  3.0,  4.0,  5.0],
                   [ 6.0,  7.0,  8.0,  9.0, 10.0],
                   [11.0, 12.0, 13.0, 14.0, 15.0],
                   [16.0, 17.0, 18.0, 19.0, 20.0],
                   [21.0, 22.0, 23.0, 24.0, 25.0]]
    kernel_feed = np.reshape(kernel_feed, kernel_shape)
    kernel_expected = [[ 1.0,  2.0, 3.0, 4.0,  5.0],
                       [ 6.0,  7.0, 8.0, 9.0, 10.0],
                       [11.0, 12.0, 0.0, 0.0,  0.0],
                       [ 0.0,  0.0, 0.0, 0.0,  0.0],
                       [ 0.0,  0.0, 0.0, 0.0,  0.0]]
    kernel_expected = np.reshape(kernel_expected, kernel_shape)
    # pylint: enable=bad-whitespace

    init_kernel = lambda s, t: tf.constant(kernel_feed, dtype=t, shape=s)
    masked_conv2d = blocks_masked_conv2d.RasterScanConv2D(
        output_depth, [kernel_size] * 2, [1] * 2, 'SAME',
        initializer=init_kernel)
    x = tf.placeholder(dtype=tf.float32, shape=[10] * 3 + [input_depth])
    _ = masked_conv2d(x)

    with self.test_session():
      tf.global_variables_initializer().run()
      kernel_value = masked_conv2d._kernel.eval()

    self.assertAllEqual(kernel_expected, kernel_value)
  def benchmark_create_1000_partitions_with_100_parameter_servers(self):
    workers, _ = create_local_cluster(num_workers=1, num_ps=100)
    worker_sessions = [tf.Session(w.target) for w in workers]
    worker = worker_sessions[0]
    partition_sizes = (1, 512, 1024*32, 1024*128)

    partitioned = []

    for partition_size in partition_sizes:
      # max_shard_bytes is 4, shape is 1000*partition_size float32s which should
      # partition into 1000 shards, each containing partition_size float32s.
      print("Building partitioned variable with %d floats per partition"
            % partition_size)
      with tf.device(tf.train.replica_device_setter(ps_tasks=100)):
        partitioned_ix = tf.get_variable(
            "partitioned_%d" % partition_size,
            shape=[1000 * partition_size],
            dtype=tf.float32,
            # Each partition to have exactly N float32s
            partitioner=tf.variable_axis_size_partitioner(
                max_shard_bytes=4 * partition_size))
        # Concatenates along axis 0
        partitioned.append(tf.convert_to_tensor(partitioned_ix))

    tf.global_variables_initializer().run(session=worker)

    for ix, partition_size in enumerate(partition_sizes):
      print("Running benchmark having partitions with %d floats"
            % partition_size)
      self.run_op_benchmark(
          worker,
          partitioned[ix],
          name=("read_concat_1000_partitions_from_"
                "100_parameter_servers_partsize_%d_floats" % partition_size))
Exemplo n.º 13
0
def train(data_dir, checkpoint_path, config):
    """Trains the model with the given data

    Args:
        data_dir: path to the data for the model (see data_utils for data
            format)
        checkpoint_path: the path to save the trained model checkpoints
        config: one of the above configs that specify the model and how it
            should be run and trained
    Returns:
        None
    """
    # Prepare Name data.
    print("Reading Name data in %s" % data_dir)
    names, counts = data_utils.read_names(data_dir)

    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = NamignizerModel(is_training=True, config=config)

        tf.global_variables_initializer().run()

        for i in range(config.max_max_epoch):
            lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)

            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_perplexity = run_epoch(session, m, names, counts, config.epoch_size, m.train_op,
                                         verbose=True)
            print("Epoch: %d Train Perplexity: %.3f" %
                  (i + 1, train_perplexity))

            m.saver.save(session, checkpoint_path, global_step=i)
Exemplo n.º 14
0
    def testOneThread(self):
        with self.test_session() as sess:
            batch_size = 10
            image_size = 32
            num_batches = 5

            zero64 = tf.constant(0, dtype=tf.int64)

            examples = tf.Variable(zero64)
            counter = examples.count_up_to(num_batches * batch_size)
            image = tf.random_normal([image_size, image_size, 3], dtype=tf.float32, name="images")
            label = tf.random_uniform([1], 0, 10, dtype=tf.int32, name="labels")

            batches = tf.train.batch([counter, image, label], batch_size=batch_size, num_threads=1)

            batches = slim.prefetch_queue.prefetch_queue(batches).dequeue()

            tf.global_variables_initializer().run()
            threads = tf.train.start_queue_runners()

            for i in range(num_batches):
                results = sess.run(batches)
                self.assertAllEqual(results[0], np.arange(i * batch_size, (i + 1) * batch_size))
                self.assertEquals(results[1].shape, (batch_size, image_size, image_size, 3))
                self.assertEquals(results[2].shape, (batch_size, 1))

            # Reached the limit.
            with self.assertRaises(tf.errors.OutOfRangeError):
                sess.run(batches)
            for thread in threads:
                thread.join()
Exemplo n.º 15
0
    def testMultipleDequeue(self):
        with self.test_session() as sess:
            batch_size = 10
            image_size = 32
            num_batches = 4

            zero64 = tf.constant(0, dtype=tf.int64)

            examples = tf.Variable(zero64)
            counter = examples.count_up_to(num_batches * batch_size)
            image = tf.random_normal([image_size, image_size, 3], dtype=tf.float32, name="images")
            label = tf.random_uniform([1], 0, 10, dtype=tf.int32, name="labels")

            batches = tf.train.batch([counter, image, label], batch_size=batch_size, num_threads=4)

            batcher = slim.prefetch_queue.prefetch_queue(batches)
            batches_list = [batcher.dequeue() for _ in range(2)]

            tf.global_variables_initializer().run()
            threads = tf.train.start_queue_runners()

            value_counter = []
            for _ in range(int(num_batches / 2)):
                for batches in batches_list:
                    results = sess.run(batches)
                    value_counter.append(results[0])
                    self.assertEquals(results[1].shape, (batch_size, image_size, image_size, 3))
                    self.assertEquals(results[2].shape, (batch_size, 1))

            self.assertAllEqual(np.sort(np.concatenate(value_counter)), np.arange(0, num_batches * batch_size))
            # Reached the limit.
            with self.assertRaises(tf.errors.OutOfRangeError):
                sess.run(batches)
            for thread in threads:
                thread.join()
Exemplo n.º 16
0
def train(model, data, gen, params):
    anim_frames = []

    with tf.Session() as session:
        tf.local_variables_initializer().run()
        tf.global_variables_initializer().run()

        for step in range(params.num_steps + 1):
            # update discriminator
            x = data.sample(params.batch_size)
            z = gen.sample(params.batch_size)
            loss_d, _, = session.run([model.loss_d, model.opt_d], {
                model.x: np.reshape(x, (params.batch_size, 1)),
                model.z: np.reshape(z, (params.batch_size, 1))
            })

            # update generator
            z = gen.sample(params.batch_size)
            loss_g, _ = session.run([model.loss_g, model.opt_g], {
                model.z: np.reshape(z, (params.batch_size, 1))
            })

            if step % params.log_every == 0:
                print('{}: {:.4f}\t{:.4f}'.format(step, loss_d, loss_g))

            if params.anim_path and (step % params.anim_every == 0):
                anim_frames.append(
                    samples(model, session, data, gen.range, params.batch_size)
                )

        if params.anim_path:
            save_animation(anim_frames, params.anim_path, gen.range)
        else:
            samps = samples(model, session, data, gen.range, params.batch_size)
            plot_distributions(samps, gen.range)
Exemplo n.º 17
0
 def testMultiLabelWithCenteredBias(self):
   n_classes = 3
   head = head_lib._multi_label_head(
       n_classes=n_classes, enable_centered_bias=True,
       metric_class_ids=range(n_classes))
   with tf.Graph().as_default(), tf.Session():
     logits = tf.constant([[1., 0., 0.]])
     labels = tf.constant([[0, 0, 1]])
     model_fn_ops = head.head_ops({}, labels,
                                  tf.contrib.learn.ModeKeys.TRAIN,
                                  _noop_train_op, logits=logits)
     _assert_variables(self, expected_global=(
         "centered_bias_weight:0",
         "centered_bias_weight/Adagrad:0",
     ), expected_trainable=(
         "centered_bias_weight:0",
     ))
     tf.global_variables_initializer().run()
     _assert_summary_tags(self, ["loss",
                                 "centered_bias/bias_0",
                                 "centered_bias/bias_1",
                                 "centered_bias/bias_2"])
     expected_loss = .89985204
     _assert_metrics(
         self, expected_loss, self._expected_eval_metrics(expected_loss),
         model_fn_ops)
Exemplo n.º 18
0
  def testDifferingKeyHeadSizes(self, gate_style):
    """Checks if arbitrary key sizes are still supported."""
    mem_slots = 2
    head_size = 32
    num_heads = 2
    key_size = 128
    batch_size = 5

    input_shape = (batch_size, 3, 3)
    mem = relational_memory.RelationalMemory(mem_slots, head_size, num_heads,
                                             gate_style=gate_style,
                                             key_size=key_size)
    self.assertNotEqual(key_size, mem._head_size)
    inputs = tf.placeholder(tf.float32, input_shape)

    memory_0 = mem.initial_state(batch_size)
    _, memory_1 = mem(inputs, memory_0)

    with self.test_session() as session:
      tf.global_variables_initializer().run()
      results = session.run(
          {"memory_1": memory_1, "memory_0": memory_0},
          feed_dict={inputs: np.ones(input_shape)})

    self.assertTrue(np.any(np.not_equal(results["memory_0"],
                                        results["memory_1"])))
Exemplo n.º 19
0
    def pretrain(self):
        # load svhn dataset
        train_images, train_labels = self.load_svhn(self.svhn_dir, split='train')
        test_images, test_labels = self.load_svhn(self.svhn_dir, split='test')

        # build a graph
        model = self.model
        model.build_model()
        
        with tf.Session(config=self.config) as sess:
            tf.global_variables_initializer().run()
            saver = tf.train.Saver()
            summary_writer = tf.summary.FileWriter(logdir=self.log_dir, graph=tf.get_default_graph())

            for step in range(self.pretrain_iter+1):
                i = step % int(train_images.shape[0] / self.batch_size)
                batch_images = train_images[i*self.batch_size:(i+1)*self.batch_size]
                batch_labels = train_labels[i*self.batch_size:(i+1)*self.batch_size] 
                feed_dict = {model.images: batch_images, model.labels: batch_labels}
                sess.run(model.train_op, feed_dict) 

                if (step+1) % 10 == 0:
                    summary, l, acc = sess.run([model.summary_op, model.loss, model.accuracy], feed_dict)
                    rand_idxs = np.random.permutation(test_images.shape[0])[:self.batch_size]
                    test_acc, _ = sess.run(fetches=[model.accuracy, model.loss], 
                                           feed_dict={model.images: test_images[rand_idxs], 
                                                      model.labels: test_labels[rand_idxs]})
                    summary_writer.add_summary(summary, step)
                    print ('Step: [%d/%d] loss: [%.6f] train acc: [%.2f] test acc [%.2f]' \
                               %(step+1, self.pretrain_iter, l, acc, test_acc))

                if (step+1) % 1000 == 0:  
                    saver.save(sess, os.path.join(self.model_save_path, 'svhn_model'), global_step=step+1) 
                    print ('svhn_model-%d saved..!' %(step+1))
Exemplo n.º 20
0
  def testVariableCopy(self):

    with graph1.as_default():
      #Define a Variable in graph1
      some_var = tf.Variable(2)
      #Initialize session
      sess1 = tf.Session()
      #Initialize the Variable
      tf.global_variables_initializer().run(session=sess1)

    #Make a copy of some_var in the defsult scope in graph2
    copy1 = tf.contrib.copy_graph.copy_variable_to_graph(
        some_var, graph2)

    #Make another copy with different scope
    copy2 = tf.contrib.copy_graph.copy_variable_to_graph(
        some_var, graph2, "test_scope")

    #Initialize both the copies
    with graph2.as_default():
      #Initialize Session
      sess2 = tf.Session()
      #Initialize the Variables
      tf.global_variables_initializer().run(session=sess2)

    #Ensure values in all three variables are the same
    v1 = some_var.eval(session=sess1)
    v2 = copy1.eval(session=sess2)
    v3 = copy2.eval(session=sess2)

    assert isinstance(copy1, tf.Variable)
    assert isinstance(copy2, tf.Variable)
    assert v1 == v2 == v3 == 2
Exemplo n.º 21
0
    def __init__(self, input_dim=None, output_dim=1, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0,
                 random_seed=None):
        Model.__init__(self)
        init_vars = [('w', [input_dim, output_dim], 'xavier', dtype),
                     ('b', [output_dim], 'zero', dtype)]
        self.graph = tf.Graph()
        with self.graph.as_default():
            if random_seed is not None:
                tf.set_random_seed(random_seed)
            self.X = tf.sparse_placeholder(dtype)
            self.y = tf.placeholder(dtype)
            self.vars = utils.init_var_map(init_vars, init_path)  # 初始化变量w, b

            w = self.vars['w']
            b = self.vars['b']
            xw = tf.sparse_tensor_dense_matmul(self.X, w)
            logits = tf.reshape(xw + b, [-1])
            self.y_prob = tf.sigmoid(logits)

            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=logits)) + \
                        l2_weight * tf.nn.l2_loss(xw)
            self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss)

            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            self.sess = tf.Session(config=config)
            tf.global_variables_initializer().run(session=self.sess)
Exemplo n.º 22
0
  def testGradientsAsVariables(self):
    for dtype in [tf.half, tf.float32, tf.float64]:
      with self.test_session() as sess:
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        cost = 5 * var0 + 3 * var1
        global_step = tf.Variable(tf.zeros([], tf.int64), name='global_step')
        sgd_op = tf.train.GradientDescentOptimizer(3.0)
        grads_and_vars = sgd_op.compute_gradients(cost, [var0, var1])
        # Convert gradients to tf.Variables
        converted_grads = [
            tf.Variable(tf.zeros([2], dtype)) for i in grads_and_vars
        ]
        convert_ops = [
            tf.assign(converted_grads[i], gv[0])
            for i, gv in enumerate(grads_and_vars)
        ]

        converted_grads_and_vars = list(zip(converted_grads, [var0, var1]))
        opt_op = sgd_op.apply_gradients(converted_grads_and_vars, global_step)

        tf.global_variables_initializer().run()
        # Run convert_ops to achieve the gradietns converting
        sess.run(convert_ops)
        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], var0.eval())
        self.assertAllClose([3.0, 4.0], var1.eval())
        # Run 1 step of sgd through optimizer
        opt_op.run()
        # Validate updated params
        self.assertAllClose([-14., -13.], var0.eval())
        self.assertAllClose([-6., -5.], var1.eval())
Exemplo n.º 23
0
  def testDenseFeaturesSeparableWithinMargins(self):
    with self._single_threaded_test_session():
      examples, variables = make_dense_examples_and_variables_dicts(
          dense_features_values=[[[1.0, 0.5], [1.0, -0.5]]],
          weights=[1.0, 1.0],
          labels=[1.0, 0.0])
      options = dict(symmetric_l2_regularization=1.0,
                     symmetric_l1_regularization=0,
                     loss_type='hinge_loss')
      model = SdcaModel(examples, variables, options)
      tf.global_variables_initializer().run()
      predictions = model.predictions(examples)
      binary_predictions = get_binary_predictions_for_hinge(predictions)

      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
      model.update_weights(train_op).run()

      # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints
      # are within the margins so there is unregularized loss (1/2 per example).
      # For these datapoints, optimal weights are w_1~=0.0 and w_2~=1.0 which
      # gives an L2 loss of ~0.25.
      self.assertAllClose([0.5, -0.5], predictions.eval(), rtol=0.05)
      self.assertAllEqual([1, 0], binary_predictions.eval())
      unregularized_loss = model.unregularized_loss(examples)
      regularized_loss = model.regularized_loss(examples)
      self.assertAllClose(0.5, unregularized_loss.eval(), atol=0.02)
      self.assertAllClose(0.75, regularized_loss.eval(), atol=0.02)
Exemplo n.º 24
0
  def testDenseFeaturesWithArbitraryWeights(self):
    with self._single_threaded_test_session():
      examples, variables = make_dense_examples_and_variables_dicts(
          dense_features_values=[[[1.0, 0.0], [0.0, 1.0]]],
          weights=[20.0, 10.0],
          labels=[10.0, -5.0])
      options = dict(symmetric_l2_regularization=5.0,
                     symmetric_l1_regularization=0,
                     loss_type='squared_loss')
      lr = SdcaModel(examples, variables, options)
      tf.global_variables_initializer().run()
      predictions = lr.predictions(examples)

      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
      lr.update_weights(train_op).run()

      # The loss function for these particular features is given by:
      # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 +
      # \lambda/2 (w_1^2 + w_2^2) where s_1, s_2 are the *example weights. It
      # turns out that the optimal (variable) weights are given by:
      # w_1* = label_1 \cdot s_1/(\lambda + s_1)= 8.0 and
      # w_2* =label_2 \cdot s_2/(\lambda + s_2)= -10/3.
      # In this case the (unnormalized regularized) loss will be:
      # s_1/2(8-10)^2 + s_2/2(5-10/3)^2 + 5.0/2(8^2 + (10/3)^2) = 2175.0/9. The
      # actual loss should be further normalized by the sum of example weights.
      self.assertAllClose([8.0, -10.0/3],
                          predictions.eval(),
                          rtol=0.01)
      loss = lr.regularized_loss(examples)
      self.assertAllClose(2175.0 / 270.0, loss.eval(), atol=0.01)
Exemplo n.º 25
0
  def testDenseFeaturesPerfectlySeparable(self):
    with self._single_threaded_test_session():
      examples, variables = make_dense_examples_and_variables_dicts(
          dense_features_values=[[1.0, 1.0], [1.0, -1.0]],
          weights=[1.0, 1.0],
          labels=[1.0, 0.0])
      options = dict(
          symmetric_l2_regularization=1.0,
          symmetric_l1_regularization=0,
          loss_type='hinge_loss')
      model = SdcaModel(examples, variables, options)
      tf.global_variables_initializer().run()
      predictions = model.predictions(examples)
      binary_predictions = get_binary_predictions_for_hinge(predictions)

      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
      model.update_weights(train_op).run()

      self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05)
      self.assertAllEqual([1, 0], binary_predictions.eval())

      # (1.0, 1.0) and (1.0, -1.0) are perfectly separable by x-axis (that is,
      # the SVM's functional margin >=1), so the unregularized loss is ~0.0.
      # There is only loss due to l2-regularization. For these datapoints, it
      # turns out that w_1~=0.0 and w_2~=1.0 which means that l2 loss is ~0.25.
      unregularized_loss = model.unregularized_loss(examples)
      regularized_loss = model.regularized_loss(examples)
      self.assertAllClose(0.0, unregularized_loss.eval(), atol=0.02)
      self.assertAllClose(0.25, regularized_loss.eval(), atol=0.02)
Exemplo n.º 26
0
  def testL1Regularization(self):
    # Setup test data
    example_protos = [
        make_example_proto(
            {'age': [0],
             'gender': [0]}, -10.0),
        make_example_proto(
            {'age': [1],
             'gender': [1]}, 14.0),
    ]
    example_weights = [1.0, 1.0]
    with self._single_threaded_test_session():
      examples = make_example_dict(example_protos, example_weights)
      variables = make_variable_dict(1, 1)
      options = dict(symmetric_l2_regularization=1.0,
                     symmetric_l1_regularization=4.0,
                     loss_type='squared_loss')
      lr = SdcaModel(examples, variables, options)
      tf.global_variables_initializer().run()
      prediction = lr.predictions(examples)
      loss = lr.regularized_loss(examples)

      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
      lr.update_weights(train_op).run()

      # Predictions should be -4.0, 48/5 due to minimizing regularized loss:
      #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 + L1 * 4 * weight
      self.assertAllClose([-4.0, 20.0 / 3.0], prediction.eval(), rtol=0.08)

      # Loss should be the sum of the regularized loss value from above per
      # example after plugging in the optimal weights.
      self.assertAllClose(308.0 / 6.0, loss.eval(), atol=0.01)
Exemplo n.º 27
0
  def testDenseFeaturesWithDefaultWeights(self):
    with self._single_threaded_test_session():
      examples, variables = make_dense_examples_and_variables_dicts(
          dense_features_values=[[[1.0], [0.0]], [0.0, 1.0]],
          weights=[1.0, 1.0],
          labels=[10.0, -5.0])
      options = dict(symmetric_l2_regularization=1.0,
                     symmetric_l1_regularization=0,
                     loss_type='squared_loss')
      lr = SdcaModel(examples, variables, options)
      tf.global_variables_initializer().run()
      predictions = lr.predictions(examples)

      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
      lr.update_weights(train_op).run()

      # The loss function for these particular features is given by:
      # 1/2(label_1-w_1)^2 + 1/2(label_2-w_2)^2 + \lambda/2 (w_1^2 + w_2^2). So,
      # differentiating wrt to w_1, w_2 yields the following optimal values:
      # w_1* = label_1/(\lambda + 1)= 10/2, w_2* =label_2/(\lambda + 1)= -5/2.
      # In this case the (unnormalized regularized) loss will be:
      # 1/2(10-5)^2 + 1/2(5-5/2)^2 + 1/2(5^2 + (5/2)^2) = 125.0/4. The actual
      # loss should be further normalized by the sum of example weights.
      self.assertAllClose([5.0, -2.5],
                          predictions.eval(),
                          rtol=0.01)
      loss = lr.regularized_loss(examples)
      self.assertAllClose(125.0 / 8.0, loss.eval(), atol=0.01)
Exemplo n.º 28
0
  def testDenseFeaturesWeightedExamples(self):
    with self._single_threaded_test_session():
      examples, variables = make_dense_examples_and_variables_dicts(
          dense_features_values=[[[1.0], [1.0]], [[0.5], [-0.5]]],
          weights=[3.0, 1.0],
          labels=[1.0, 0.0])
      options = dict(symmetric_l2_regularization=1.0,
                     symmetric_l1_regularization=0,
                     loss_type='hinge_loss')
      model = SdcaModel(examples, variables, options)
      tf.global_variables_initializer().run()
      predictions = model.predictions(examples)
      binary_predictions = get_binary_predictions_for_hinge(predictions)
      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
      model.update_weights(train_op).run()

      # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will
      # try to increase the margin from (1.0, 0.5). Due to regularization,
      # (1.0, -0.5) will be within the margin. For these points and example
      # weights, the optimal weights are w_1~=0.4 and w_2~=1.2 which give an L2
      # loss of 0.5 * 0.25 * 0.25 * 1.6 = 0.2. The binary predictions will be
      # correct, but the boundary will be much closer to the 2nd point than the
      # first one.
      self.assertAllClose([1.0, -0.2], predictions.eval(), atol=0.05)
      self.assertAllEqual([1, 0], binary_predictions.eval())
      unregularized_loss = model.unregularized_loss(examples)
      regularized_loss = model.regularized_loss(examples)
      self.assertAllClose(0.2, unregularized_loss.eval(), atol=0.02)
      self.assertAllClose(0.4, regularized_loss.eval(), atol=0.02)
Exemplo n.º 29
0
  def testFractionalExampleLabel(self):
    # Setup test data with 1 positive, and 1 mostly-negative example.
    example_protos = [
        make_example_proto(
            {'age': [0],
             'gender': [0]}, 0.1),
        make_example_proto(
            {'age': [1],
             'gender': [1]}, 1),
    ]
    example_weights = [1.0, 1.0]
    for num_shards in _SHARD_NUMBERS:
      with self._single_threaded_test_session():
        examples = make_example_dict(example_protos, example_weights)
        variables = make_variable_dict(1, 1)
        options = dict(symmetric_l2_regularization=1,
                       symmetric_l1_regularization=0,
                       num_table_shards=num_shards,
                       loss_type='logistic_loss')

        lr = SdcaModel(examples, variables, options)
        tf.global_variables_initializer().run()
        with self.assertRaisesOpError(
            'Only labels of 0.0 or 1.0 are supported right now.'):
          lr.minimize().run()
Exemplo n.º 30
0
 def testBasicLSTMCell(self):
   with self.test_session() as sess:
     with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
       x = tf.zeros([1, 2])
       m = tf.zeros([1, 8])
       g, out_m = tf.nn.rnn_cell.MultiRNNCell(
           [tf.nn.rnn_cell.BasicLSTMCell(2, state_is_tuple=False)] * 2,
           state_is_tuple=False)(x, m)
       sess.run([tf.global_variables_initializer()])
       res = sess.run([g, out_m], {x.name: np.array([[1., 1.]]),
                                   m.name: 0.1 * np.ones([1, 8])})
       self.assertEqual(len(res), 2)
       # The numbers in results were not calculated, this is just a smoke test.
       self.assertAllClose(res[0], [[0.24024698, 0.24024698]])
       expected_mem = np.array([[0.68967271, 0.68967271,
                                 0.44848421, 0.44848421,
                                 0.39897051, 0.39897051,
                                 0.24024698, 0.24024698]])
       self.assertAllClose(res[1], expected_mem)
     with tf.variable_scope("other", initializer=tf.constant_initializer(0.5)):
       x = tf.zeros([1, 3])  # Test BasicLSTMCell with input_size != num_units.
       m = tf.zeros([1, 4])
       g, out_m = tf.nn.rnn_cell.BasicLSTMCell(2, state_is_tuple=False)(x, m)
       sess.run([tf.global_variables_initializer()])
       res = sess.run([g, out_m], {x.name: np.array([[1., 1., 1.]]),
                                   m.name: 0.1 * np.ones([1, 4])})
       self.assertEqual(len(res), 2)
Exemplo n.º 31
0
Arquivo: train.py Projeto: kiminh/AMAD
def run(args): 
	#load configuration
	config = Config(args) 
	#load data
	dataset = ld.LoadData(args.input)
	data = dataset.data
	label = dataset.label
	anomaly_num = dataset.anomaly_num
	feature_index = dataset.feature_index
	# feature_item_num = np.sum(dataset.feature_item_num)
	feature_item_num = dataset.feature_item_num # number of unique item ids in dataset instance
	instance_num = len(data)
	#for training
	training_data = data[:instance_num-2*anomaly_num]
	training_data = ld.get_shaped_data(training_data,config.batch_size,config.block_size,len(data[0]))
	print("----------finish shaping training data!-----------")
	instance_dim = len(training_data[0][0][0])

	#for testing
	testing_data = data[instance_num-2*anomaly_num:]
	testing_label = label[instance_num-2*anomaly_num:]
	#shuffle testing data,to ensure testing data and label are shuffled in the same way
	randnum = config.seed
	random.seed(randnum)
	random.shuffle(testing_data)
	random.seed(randnum)
	random.shuffle(testing_label)

	testing_data = ld.get_shaped_data(testing_data,config.batch_size,config.block_size,len(data[0]))
	print("----------finish shaping testing data!-----------")
	testing_data_num = len(testing_label) - len(testing_label)%(config.block_size*config.batch_size)
	testing_label = testing_label[:testing_data_num] # testing data instance level ground truth

	print("training data",training_data.shape,instance_dim)
	print("testing data",testing_data.shape,testing_data_num,testing_data[0].shape)
	print("anomaly_num",anomaly_num)
	print("number of normal data in testing data:",np.sum(testing_label),len(testing_label))
	print("feature_item_num",feature_item_num)
	with tf.Graph().as_default(),tf.Session() as sess:
		#graph settings
		FM_weight_dim = config.FM_weight_dim
		batch_size = config.batch_size
		block_size = config.block_size
		attention_dim = config.attention_dim
		autoencoder_hidden_dim = config.autoencoder_hidden_dim
		lstm_dropout_keep_prob = config.lstm_dropout_keep_prob
		lstm_layer_num = config.lstm_layer_num
		lstm_hidden_size = config.lstm_hidden_size
		is_training = config.is_training
		gan_hidden_dim = config.gan_hidden_dim
		alpha = config.alpha
		beta = config.beta
		noise = config.noise
		learning_rate = config.learning_rate
		model = AnomalyNet(feature_index,
							FM_weight_dim,
							feature_item_num,
							batch_size,
							block_size,
							instance_dim,
							attention_dim,
							autoencoder_hidden_dim,
							lstm_dropout_keep_prob,
							lstm_layer_num,
							lstm_hidden_size,
							is_training,
							gan_hidden_dim,
							alpha,
							beta,
							noise,
							learning_rate)
		saver = tf.train.Saver(max_to_keep=10)#saver for checkpoints, add var_list because of batching training
		
		init = tf.global_variables_initializer()  
		sess.run(init)
		
		flag = 0
		for epoch in range(config.epoch):
			# training
			for i in range(len(training_data)):
				flag = flag + 1
				pointer = flag % 100
				curr_batch = training_data[i]	
				feed_dict = {model.data: curr_batch}
				if pointer < 50:
					result = sess.run((model.G_train),feed_dict=feed_dict)
				else:
					result = sess.run((model.D_train),feed_dict=feed_dict)
				# result = sess.run((model.G_train,model.D_train),feed_dict=feed_dict)	
				if i % 50 == 0:
					result = sess.run((model.generator_loss,model.discriminator_loss),feed_dict=feed_dict)
					print("current epoch %d, in batch %d, current flag is %d, generator average loss %.4f, discriminator average loss %.4f"%(epoch,i,pointer,result[0],result[1]))
					# result = sess.run((model.test1,model.test2,model.test3,model.test4),feed_dict=feed_dict)
					# print(result[0],result[0].shape,result[1],result[1].shape)#,result[2][0:10],result[2].shape,result[3],result[3].shape)
			
			# model_path = "saved_model/epoch_%s.ckpt" % (epoch)
			# saver.save(sess, model_path) 
			# '''
			# #####
			# testing 
			# #####
			# '''

			#instance output 
			instance_loss_list = []
			block_loss_list = []
			for i in range(len(testing_data)):
				curr_batch = testing_data[i]	
				feed_dict = {model.data: curr_batch}
				instance_loss,block_loss = sess.run((model.instance_total_loss,model.block_total_loss),feed_dict=feed_dict)
				for i in range(len(instance_loss)):
					instance_loss_list.append(instance_loss[i])

				for i in range(len(block_loss)):
					block_loss_list.append(block_loss[i])

			bw = open(args.instance_output+'_%d'%(epoch), 'w')#by dingfu
			bw.write("true pred\n")
			for i in range(len(instance_loss_list)):
				bw.write(str(testing_label[i])+ " "+str(instance_loss_list[i])+"\n") 
			bw.close()
					

			#block output 
			testing_block_num = testing_data_num // config.block_size
			block_true = []	
			for i in range(testing_block_num):
				true_sum = np.sum(testing_label[i*config.block_size:(i+1)*config.block_size])
				
				# generate ground truth	
				if true_sum < config.block_size*config.block_ratio:
					block_true.append(0)
				else:
					block_true.append(1)
			
			bw = open(args.block_output+'_%d'%(epoch), 'w')#by dingfu
			bw.write("true pred\n")
			for i in range(testing_block_num):
				bw.write(str(block_true[i])+ " "+str(block_loss_list[i])+"\n")
			bw.close()

			# print(true_block,pred_block)
			instance_auc,_,_,_ = newmetrics.roc(testing_label,instance_loss_list,pos_label=0,output_path=args.instance_output+'_%d'%(epoch))#by dingfu
			block_auc,_,_,_ = newmetrics.roc(block_true,block_loss_list,pos_label=0,output_path=args.block_output+'_%d'%(epoch))#by dingfu
			#print("instance level evaluation: ",instance_eval)
			print('epoch:',epoch," instance level auc: ",instance_auc)
			#print("block level evaluation: ",block_eval)				
			print('epoch:',epoch," block level auc: ",block_auc)	
Exemplo n.º 32
0
def train_province():
    global iterations_P
    global time_begin
    #if __name__ == '__main__' and sys.argv[1] == 'train':
    # 第一次遍历图片目录是为了获取图片总数
    input_count = 0
    for i in range(0, NUM_CLASSES_P):
        dir = './train_images/training-set/chinese-characters/%s/' % i  # 这里可以改成你自己的图片目录,i为分类标签
        for rt, dirs, files in os.walk(dir):
            for filename in files:
                input_count += 1

    # 定义对应维数和各维长度的数组
    input_images = np.array([[0] * SIZE for i in range(input_count)])
    input_labels = np.array([[0] * NUM_CLASSES_P for i in range(input_count)])

    # 第二次遍历图片目录是为了生成图片数据和标签
    index = 0
    for i in range(0, NUM_CLASSES_P):
        dir = './train_images/training-set/chinese-characters/%s/' % i  # 这里可以改成你自己的图片目录,i为分类标签
        for rt, dirs, files in os.walk(dir):
            for filename in files:
                filename = dir + filename
                img = Image.open(filename)
                width = img.size[0]
                height = img.size[1]
                for h in range(0, height):
                    for w in range(0, width):
                        # 通过这样的处理,使数字的线条变细,有利于提高识别准确率
                        if img.getpixel((w, h)) > 230:
                            input_images[index][w + h * width] = 0
                        else:
                            input_images[index][w + h * width] = 1
                input_labels[index][i] = 1
                index += 1

    # 第一次遍历图片目录是为了获取图片总数
    val_count = 0
    for i in range(0, NUM_CLASSES_P):
        dir = './train_images/validation-set/chinese-characters/%s/' % i  # 这里可以改成你自己的图片目录,i为分类标签
        for rt, dirs, files in os.walk(dir):
            for filename in files:
                val_count += 1

    # 定义对应维数和各维长度的数组
    val_images = np.array([[0] * SIZE for i in range(val_count)])
    val_labels = np.array([[0] * NUM_CLASSES_P for i in range(val_count)])

    # 第二次遍历图片目录是为了生成图片数据和标签
    index = 0
    for i in range(0, NUM_CLASSES_P):
        dir = './train_images/validation-set/chinese-characters/%s/' % i  # 这里可以改成你自己的图片目录,i为分类标签
        for rt, dirs, files in os.walk(dir):
            for filename in files:
                filename = dir + filename
                img = Image.open(filename)
                width = img.size[0]
                height = img.size[1]
                for h in range(0, height):
                    for w in range(0, width):
                        # 通过这样的处理,使数字的线条变细,有利于提高识别准确率
                        if img.getpixel((w, h)) > 230:
                            val_images[index][w + h * width] = 0
                        else:
                            val_images[index][w + h * width] = 1
                val_labels[index][i] = 1
                index += 1

    with tf.Session() as sess:
        # 第一个卷积层
        W_conv1 = tf.Variable(tf.truncated_normal([8, 8, 1, 16], stddev=0.1),
                              name="W_conv1")
        b_conv1 = tf.Variable(tf.constant(0.1, shape=[16]), name="b_conv1")
        conv_strides = [1, 1, 1, 1]
        kernel_size = [1, 2, 2, 1]
        pool_strides = [1, 2, 2, 1]
        L1_pool = conv_layer(x_image,
                             W_conv1,
                             b_conv1,
                             conv_strides,
                             kernel_size,
                             pool_strides,
                             padding='SAME')

        # 第二个卷积层
        W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 16, 32], stddev=0.1),
                              name="W_conv2")
        b_conv2 = tf.Variable(tf.constant(0.1, shape=[32]), name="b_conv2")
        conv_strides = [1, 1, 1, 1]
        kernel_size = [1, 1, 1, 1]
        pool_strides = [1, 1, 1, 1]
        L2_pool = conv_layer(L1_pool,
                             W_conv2,
                             b_conv2,
                             conv_strides,
                             kernel_size,
                             pool_strides,
                             padding='SAME')

        # 全连接层
        W_fc1 = tf.Variable(tf.truncated_normal([16 * 20 * 32, 512],
                                                stddev=0.1),
                            name="W_fc1")
        b_fc1 = tf.Variable(tf.constant(0.1, shape=[512]), name="b_fc1")
        h_pool2_flat = tf.reshape(L2_pool, [-1, 16 * 20 * 32])
        h_fc1 = full_connect(h_pool2_flat, W_fc1, b_fc1)

        # dropout
        keep_prob = tf.placeholder(tf.float32)

        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

        # readout层
        W_fc2 = tf.Variable(tf.truncated_normal([512, NUM_CLASSES_P],
                                                stddev=0.1),
                            name="W_fc2")
        b_fc2 = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES_P]),
                            name="b_fc2")

        # 定义优化器和训练op
        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=y_P, logits=y_conv))
        train_step = tf.train.AdamOptimizer((1e-4)).minimize(cross_entropy)

        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_P, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        # 初始化saver
        saver = tf.train.Saver()

        sess.run(tf.global_variables_initializer())

        time_elapsed = time.time() - time_begin
        print("读取图片文件耗费时间:%d秒" % time_elapsed)
        time_begin = time.time()

        print("一共读取了 %s 个训练图像, %s 个标签" % (input_count, input_count))

        # 设置每次训练op的输入个数和迭代次数,这里为了支持任意图片总数,定义了一个余数remainder,譬如,如果每次训练op的输入个数为60,图片总数为150张,则前面两次各输入60张,最后一次输入30张(余数30)
        batch_size = 60
        iterations_P = iterations_P
        batches_count = int(input_count / batch_size)
        remainder = input_count % batch_size
        print("训练数据集分成 %s 批, 前面每批 %s 个数据,最后一批 %s 个数据" %
              (batches_count + 1, batch_size, remainder))

        # 执行训练迭代
        for it in range(iterations_P):
            # 这里的关键是要把输入数组转为np.array
            for n in range(batches_count):
                train_step.run(
                    feed_dict={
                        x: input_images[n * batch_size:(n + 1) * batch_size],
                        y_P: input_labels[n * batch_size:(n + 1) * batch_size],
                        keep_prob: 0.5
                    })
            if remainder > 0:
                start_index = batches_count * batch_size
                train_step.run(
                    feed_dict={
                        x: input_images[start_index:input_count - 1],
                        y_P: input_labels[start_index:input_count - 1],
                        keep_prob: 0.5
                    })

            # 每完成五次迭代,判断准确度是否已达到100%,达到则退出迭代循环
            iterate_accuracy = 0
            if it % 5 == 0:
                iterate_accuracy = accuracy.eval(feed_dict={
                    x: val_images,
                    y_P: val_labels,
                    keep_prob: 1.0
                })
                print('第 %d 次训练迭代: 准确率 %0.5f%%' % (it, iterate_accuracy * 100))
                if iterate_accuracy >= 0.995 and it >= 150:
                    break

        print('完成训练!')
        time_elapsed = time.time() - time_begin
        print("训练耗费时间:%d秒" % time_elapsed)
        time_begin = time.time()

        # 保存训练结果
        if not os.path.exists(SAVER_DIR_P):
            print('不存在训练数据保存目录,现在创建保存目录')
            os.makedirs(SAVER_DIR_P)
        saver_path = saver.save(sess, "%smodel.ckpt" % (SAVER_DIR_P))
Exemplo n.º 33
0
    def _fit_cdr(self):
        import tensorflow as tf
        from .model import Model

        n_users = self.train_set.num_users
        n_items = self.train_set.num_items

        text_feature = self.train_set.item_text.batch_bow(
            np.arange(n_items)
        )  # bag of word feature
        text_feature = (text_feature - text_feature.min()) / (
            text_feature.max() - text_feature.min()
        )  # normalization

        # Build model
        layer_sizes = (
            [self.vocab_size]
            + self.autoencoder_structure
            + [self.k]
            + self.autoencoder_structure
            + [self.vocab_size]
        )
        tf.set_random_seed(self.seed)
        model = Model(
            n_users=n_users,
            n_items=n_items,
            n_vocab=self.vocab_size,
            k=self.k,
            layers=layer_sizes,
            lambda_u=self.lambda_u,
            lambda_v=self.lambda_v,
            lambda_w=self.lambda_w,
            lambda_n=self.lambda_n,
            lr=self.learning_rate,
            dropout_rate=self.dropout_rate,
            U=self.U,
            V=self.V,
            act_fn=self.act_fn,
            seed=self.seed,
        )

        # Training model
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())

            loop = trange(self.max_iter, disable=not self.verbose)
            for _ in loop:
                corruption_mask = self.rng.binomial(
                    1, 1 - self.corruption_rate, (n_items, self.vocab_size)
                )
                sum_loss = 0
                count = 0
                batch_count = 0
                for batch_u, batch_i, batch_j in self.train_set.uij_iter(
                    batch_size=self.batch_size, shuffle=True
                ):
                    feed_dict = {
                        model.mask_input: corruption_mask[batch_i, :],
                        model.text_input: text_feature[batch_i, :],
                        model.batch_u: batch_u,
                        model.batch_i: batch_i,
                        model.batch_j: batch_j,
                    }

                    sess.run(model.opt1, feed_dict)  # train U, V
                    _, _loss = sess.run(
                        [model.opt2, model.loss], feed_dict
                    )  # train SDAE

                    sum_loss += _loss
                    count += len(batch_u)
                    batch_count += 1
                    if batch_count % 10 == 0:
                        loop.set_postfix(loss=(sum_loss / count))

            self.U, self.V = sess.run([model.U, model.V])

        tf.reset_default_graph()

        if self.verbose:
            print("\nLearning completed")
Exemplo n.º 34
0
    def train(self, trainX=None, trainy=None, valX=None, valy=None):
        """train dnn based malware detector"""
        if trainX is None and trainy is None:
            trainX, valX, _ = utils.read_joblib(
                config.get('feature.' + self.feature_tp, 'dataX'))
            trainy, valy, _ = utils.read_joblib(
                config.get('feature.' + self.feature_tp, 'datay'))

        train_input_supervised = utils.DataProducer(
            trainX,
            trainy,
            self.hp_params.batch_size,
            n_epochs=self.hp_params.n_epochs)
        val_input = utils.DataProducer(valX,
                                       valy,
                                       self.hp_params.batch_size,
                                       name='test')

        global_train_step = tf.train.get_or_create_global_step()
        saver = tf.train.Saver()
        tf.summary.scalar('accuracy', self.accuracy)
        tf.summary.scalar('loss', self.cross_entropy)
        merged_summaries = tf.summary.merge_all()

        # optimizer
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            optimizer = tf.train.AdamOptimizer(
                self.hp_params.learning_rate).minimize(
                    self.cross_entropy, global_step=global_train_step)
        tf_cfg = tf.ConfigProto(log_device_placement=True,
                                allow_soft_placement=True)
        tf_cfg.gpu_options.allow_growth = True
        tf_cfg.gpu_options.per_process_gpu_memory_fraction = 1.
        sess = tf.Session(config=tf_cfg)

        with sess.as_default():
            summary_writer = tf.summary.FileWriter(self.save_dir, sess.graph)
            sess.run(tf.global_variables_initializer())

            training_time = 0.0
            train_input_supervised.reset_cursor()
            output_steps = 50
            best_val_acc = 0.
            for step_idx, X_batch, y_batch in train_input_supervised.next_batch(
            ):
                train_dict = {
                    self.x_input: X_batch,
                    self.y_input: y_batch,
                    self.is_training: True
                }

                if (step_idx + 1) % output_steps == 0:
                    print('Step {}/{}:{}'.format(step_idx + 1,
                                                 train_input_supervised.steps,
                                                 datetime.now()))
                    val_input.reset_cursor()
                    val_accs = [sess.run(self.accuracy, feed_dict={self.x_input: valX_batch,
                                                                    self.y_input: valy_batch,
                                                                    self.is_training: False}) \
                                 for [_, valX_batch, valy_batch] in val_input.next_batch()
                                 ]
                    _acc = np.mean(val_accs)
                    print('    validation accuracy {:.5}%'.format(_acc * 100))
                    if step_idx != 0:
                        print('    {} samples per second'.format(
                            output_steps * self.hp_params.batch_size /
                            training_time))
                        training_time = 0.

                    summary = sess.run(merged_summaries, feed_dict=train_dict)
                    summary_writer.add_summary(summary,
                                               global_train_step.eval(sess))

                    if best_val_acc < _acc:
                        if not os.path.exists(self.save_dir):
                            os.makedirs(self.save_dir)
                        saver.save(sess,
                                   os.path.join(self.save_dir, 'checkpoint'),
                                   global_step=global_train_step)

                start = default_timer()
                sess.run(optimizer, feed_dict=train_dict)
                end = default_timer()
                training_time = training_time + end - start
        sess.close()
Exemplo n.º 35
0
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

## fc2 layer ##
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)


# the error between prediction and real data
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
                                              reduction_indices=[1]))       # loss
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

sess = tf.Session()
# important step
# tf.initialize_all_variables() no long valid from
# 2017-03-02 if using tensorflow >= 0.12
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
    init = tf.initialize_all_variables()
else:
    init = tf.global_variables_initializer()
sess.run(init)

for i in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob: 0.5})
    if i % 50 == 0:
        print(compute_accuracy(
            mnist.test.images, mnist.test.labels))
Exemplo n.º 36
0
import tensorflow as tf

with tf.device('/cpu:0'):
    a = tf.constant([1.0,2.0,3.0],shape=[3],name='a')
    b = tf.constant([1.0,2.0,3.0],shape=[3],name='b')
with tf.device('/gpu:1'):
    c = a+b
   
#注意:allow_soft_placement=True表明:计算设备可自行选择,如果没有这个参数,会报错。
#因为不是所有的操作都可以被放在GPU上,如果强行将无法放在GPU上的操作指定到GPU上,将会报错。
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=True))
#sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
sess.run(tf.global_variables_initializer())
print(sess.run(c))
Exemplo n.º 37
0
def train():
    with tf.Graph().as_default():
        with tf.device('/gpu:'+str(GPU_INDEX)):
            pointclouds_pl, labels_pl = placeholder_inputs(BATCH_SIZE, NUM_POINT)
            is_training_pl = tf.placeholder(tf.bool, shape=())
            
            # Note the global_step=batch parameter to minimize. 
            # That tells the optimizer to helpfully increment the 'batch' parameter for you every time it trains.
            batch = tf.Variable(0)
            bn_decay = get_bn_decay(batch)
            tf.summary.scalar('bn_decay', bn_decay)

            # Get model and loss 
            pred = get_model(pointclouds_pl, is_training_pl, bn_decay=bn_decay)
            loss = get_loss(pred, labels_pl)
            tf.summary.scalar('loss', loss)

            correct = tf.equal(tf.argmax(pred, 2), tf.to_int64(labels_pl))
            accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(BATCH_SIZE*NUM_POINT)
            tf.summary.scalar('accuracy', accuracy)

            # Get training operator
            learning_rate = get_learning_rate(batch)
            tf.summary.scalar('learning_rate', learning_rate)
            if OPTIMIZER == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM)
            elif OPTIMIZER == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            train_op = optimizer.minimize(loss, global_step=batch)
            
            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()
            
        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = True
        sess = tf.Session(config=config)

        # Add summary writers
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
                                  sess.graph)
        test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'))

        # Init variables
        init = tf.global_variables_initializer()
        sess.run(init, {is_training_pl:True})

        ops = {'pointclouds_pl': pointclouds_pl,
               'labels_pl': labels_pl,
               'is_training_pl': is_training_pl,
               'pred': pred,
               'loss': loss,
               'train_op': train_op,
               'merged': merged,
               'step': batch}

        for epoch in range(MAX_EPOCH):
            log_string('**** EPOCH %03d ****' % (epoch))
            sys.stdout.flush()
             
            train_one_epoch(sess, ops, train_writer)
            eval_one_epoch(sess, ops, test_writer)
            
            # Save the variables to disk.
            if epoch % 10 == 0:
                save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"))
                log_string("Model saved in file: %s" % save_path)
Exemplo n.º 38
0
  def __init__(self, files, img_length, num_colors, d_sizes, g_sizes):

    # save for later
    self.N = len(files)
    self.img_length = img_length
    self.num_colors = num_colors
    self.latent_dims = g_sizes['z']

    # define the input data
    self.Z = tf.placeholder(
      tf.float32,
      shape=(None, self.latent_dims),
      name='Z'
    )

    # note: by making batch_sz a placeholder, we can specify a variable
    # number of samples in the FS-conv operation where we are required
    # to pass in output_shape
    # we need only pass in the batch size via feed_dict
    self.batch_sz = tf.placeholder(tf.int32, shape=(), name='batch_sz')


    filename_queue = tf.train.string_input_producer(files)
    reader = tf.WholeFileReader()
    key, value = reader.read(filename_queue)
    images = preprocessing(value, self.num_colors, self.img_length)
    self.X = tf.train.shuffle_batch([images], 
                                        batch_size=self.batch_sz, 
                                        capacity=2000,
                                        allow_smaller_final_batch=True,
                                        min_after_dequeue=50)


    # build the discriminator
    logits = self.build_discriminator(self.X, d_sizes)

    # build generator
    self.sample_images = self.build_generator(self.Z, g_sizes)

    # get sample logits
    with tf.variable_scope("discriminator") as scope:
      scope.reuse_variables()
      sample_logits = self.d_forward(self.sample_images, True)

    # get sample images for test time (batch norm is different)
    with tf.variable_scope("generator") as scope:
      scope.reuse_variables()
      self.sample_images_test = self.g_forward(
        self.Z, reuse=True, is_training=False
      )

    # build costs
    self.d_cost_real = tf.nn.sigmoid_cross_entropy_with_logits(
      logits=logits,
      labels=tf.ones_like(logits)
    )
    self.d_cost_fake = tf.nn.sigmoid_cross_entropy_with_logits(
      logits=sample_logits,
      labels=tf.zeros_like(sample_logits)
    )
    self.d_cost = tf.reduce_mean(self.d_cost_real) + tf.reduce_mean(self.d_cost_fake)
    self.g_cost = tf.reduce_mean(
      tf.nn.sigmoid_cross_entropy_with_logits(
        logits=sample_logits,
        labels=tf.ones_like(sample_logits)
      )
    )
    real_predictions = tf.cast(logits > 0, tf.float32)
    fake_predictions = tf.cast(sample_logits < 0, tf.float32)
    num_predictions = 2.0*BATCH_SIZE
    num_correct = tf.reduce_sum(real_predictions) + tf.reduce_sum(fake_predictions)
    self.d_accuracy = num_correct / num_predictions


    # optimizers
    self.d_params = [t for t in tf.trainable_variables() if t.name.startswith('d')]
    self.g_params = [t for t in tf.trainable_variables() if t.name.startswith('g')]

    self.d_train_op = tf.train.AdamOptimizer(
      LEARNING_RATE, beta1=BETA1
    ).minimize(
      self.d_cost, var_list=self.d_params
    )
    self.g_train_op = tf.train.AdamOptimizer(
      LEARNING_RATE, beta1=BETA1
    ).minimize(
      self.g_cost, var_list=self.g_params
    )

    # show_all_variables()
    # exit()

    # set up session and variables for later
    self.init_op = tf.global_variables_initializer()
    self.sess = tf.InteractiveSession()
    self.sess.run(self.init_op)
Exemplo n.º 39
0
def train():
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_id
    """
        Step 1: Create dirs for saving models and logs
    """
    model_path_suffix = os.path.join(
        FLAGS.network_def, 'input_{}_output_{}'.format(FLAGS.input_size,
                                                       FLAGS.heatmap_size),
        'joints_{}'.format(FLAGS.num_of_joints),
        'stages_{}'.format(FLAGS.cpm_stages),
        'init_{}_rate_{}_step_{}'.format(FLAGS.init_lr, FLAGS.lr_decay_rate,
                                         FLAGS.lr_decay_step))
    model_save_dir = os.path.join('logs_and_weights_add_occlusion_and_fpn',
                                  'models', 'weights', model_path_suffix)
    train_log_save_dir = os.path.join('logs_and_weights_add_occlusion_and_fpn',
                                      'models', 'logs', model_path_suffix,
                                      'train')
    test_log_save_dir = os.path.join('logs_and_weights_add_occlusion_and_fpn',
                                     'models', 'logs', model_path_suffix,
                                     'test')
    os.system('mkdir -p {}'.format(model_save_dir))
    os.system('mkdir -p {}'.format(train_log_save_dir))
    os.system('mkdir -p {}'.format(test_log_save_dir))
    """ 
        Step 2: Create dataset and data generator
    """
    print('--Parsing Config File')
    params = process_config(datagenerator_config_file)
    print('--Creating Dataset')
    dataset = DataGenerator(
        params['total_joints_list'], params['blouse_joints_list'],
        params['dress_joints_list'], params['outwear_joints_list'],
        params['skirt_joints_list'], params['trousers_joints_list'],
        params['blouse_index'], params['dress_index'], params['outwear_index'],
        params['skirt_index'], params['trousers_index'],
        params['img_directory'], params['training_data_file'])
    dataset.generate_set(rand=True, validationRate=0.15)

    generator = dataset.generator(batchSize=FLAGS.batch_size,
                                  norm=False,
                                  sample='train')
    generator_eval = dataset.generator(batchSize=FLAGS.batch_size,
                                       norm=False,
                                       sample='valid')
    """ 
        Step 3: Build network graph
    """
    model = cpm_model.CPM_Model(total_num=FLAGS.total_num,
                                input_size=FLAGS.input_size,
                                heatmap_size=FLAGS.heatmap_size,
                                batch_size=FLAGS.batch_size,
                                stages=FLAGS.cpm_stages,
                                num_joints=FLAGS.num_of_joints,
                                img_type=FLAGS.color_channel,
                                is_training=True)
    # model.build_loss(FLAGS.init_lr, FLAGS.lr_decay_rate, FLAGS.lr_decay_step, optimizer='Adam')
    model.build_loss3(optimizer='Adam')
    print('=====Model Build=====\n')

    merged_summary = tf.summary.merge_all()
    """ 
        Step 4: Training
    """
    device_count = {'GPU': 1} if FLAGS.use_gpu else {'GPU': 0}
    with tf.Session(config=tf.ConfigProto(device_count=device_count,
                                          allow_soft_placement=True)) as sess:
        # Create tensorboard
        train_writer = tf.summary.FileWriter(train_log_save_dir, sess.graph)
        test_writer = tf.summary.FileWriter(test_log_save_dir, sess.graph)

        # Create model saver
        saver = tf.train.Saver(max_to_keep=None)

        # Init all vars
        init_op = tf.global_variables_initializer()
        sess.run(init_op)

        # Restore pretrained weights
        if FLAGS.pretrained_model != '':
            if FLAGS.pretrained_model.endswith('.pkl'):
                model.load_weights_from_file(FLAGS.pretrained_model,
                                             sess,
                                             finetune=True)
                # Check weights
                for variable in tf.trainable_variables():
                    with tf.variable_scope('', reuse=True):
                        var = tf.get_variable(variable.name.split(':0')[0])
                        print(variable.name, np.mean(sess.run(var)))
            else:
                checkpoint = tf.train.get_checkpoint_state(
                    FLAGS.pretrained_model)
                # 获取最新保存的模型检查点文件
                ckpt = checkpoint.model_checkpoint_path
                saver.restore(sess, ckpt)
                # check weights
                for variable in tf.trainable_variables():
                    with tf.variable_scope('', reuse=True):
                        var = tf.get_variable(variable.name.split(':0')[0])
                        print(variable.name, np.mean(sess.run(var)))

        for training_itr in range(FLAGS.training_iters):
            t1 = time.time()
            """ 
                修改重点:DataGenerator应用的地方
            """
            # Read one batch data

            batch_x_np, batch_gt_heatmap_np, batch_centermap, batch_weight_np = next(
                generator)
            # print(batch_x_np.shape,batch_gt_heatmap_np.shape, batch_centermap.shape)

            if FLAGS.normalize_img:
                # Normalize images
                batch_x_np = batch_x_np / 255.0 - 0.5
            else:
                batch_x_np -= 128.0
            '''
            # Generate heatmaps from joints
            batch_gt_heatmap_np = cpm_utils.make_heatmaps_from_joints(FLAGS.input_size,
                                                                      FLAGS.heatmap_size,
                                                                      FLAGS.joint_gaussian_variance,
                                                                      batch_joints_np)
            '''

            # Forward and update weights
            stage_losses_np, total_loss_np, _, summaries, current_lr, \
            stage_heatmap_np, global_step = sess.run([model.stage_loss,
                                                      model.total_loss,
                                                      model.train_op,
                                                      merged_summary,
                                                      model.lr,
                                                      model.stage_heatmap,
                                                      model.global_step
                                                      ],
                                                     feed_dict={model.input_images: batch_x_np,
                                                                model.cmap_placeholder: batch_centermap,
                                                                model.gt_hmap_placeholder: batch_gt_heatmap_np,
                                                                model.train_weights_placeholder: batch_weight_np})

            # Show training info
            print_current_training_stats(global_step, current_lr,
                                         stage_losses_np, total_loss_np,
                                         time.time() - t1)

            # Write logs
            train_writer.add_summary(summaries, global_step)

            if FLAGS.if_show:
                # Draw intermediate results
                if (global_step + 1) % FLAGS.img_show_iters == 0:
                    if FLAGS.color_channel == 'GRAY':
                        demo_img = np.repeat(batch_x_np[0], 3, axis=2)
                        if FLAGS.normalize_img:
                            demo_img += 0.5
                        else:
                            demo_img += 128.0
                            demo_img /= 255.0
                    elif FLAGS.color_channel == 'RGB':
                        if FLAGS.normalize_img:
                            demo_img = batch_x_np[0] + 0.5
                        else:
                            demo_img += 128.0
                            demo_img /= 255.0
                    else:
                        raise ValueError('Non support image type.')

                    demo_stage_heatmaps = []

                    for stage in range(FLAGS.cpm_stages):
                        demo_stage_heatmap = stage_heatmap_np[stage][
                            0, :, :, 0:FLAGS.num_of_joints].reshape(
                                (FLAGS.heatmap_size, FLAGS.heatmap_size,
                                 FLAGS.num_of_joints))
                        demo_stage_heatmap = cv2.resize(
                            demo_stage_heatmap,
                            (FLAGS.input_size, FLAGS.input_size))
                        demo_stage_heatmap = np.amax(demo_stage_heatmap,
                                                     axis=2)
                        demo_stage_heatmap = np.reshape(
                            demo_stage_heatmap,
                            (FLAGS.input_size, FLAGS.input_size, 1))
                        demo_stage_heatmap = np.repeat(demo_stage_heatmap,
                                                       3,
                                                       axis=2)
                        demo_stage_heatmaps.append(demo_stage_heatmap)

                    demo_gt_heatmap = batch_gt_heatmap_np[
                        0, :, :, 0:FLAGS.num_of_joints].reshape(
                            (FLAGS.heatmap_size, FLAGS.heatmap_size,
                             FLAGS.num_of_joints))
                    demo_gt_heatmap = cv2.resize(
                        demo_gt_heatmap, (FLAGS.input_size, FLAGS.input_size))
                    demo_gt_heatmap = np.amax(demo_gt_heatmap, axis=2)
                    demo_gt_heatmap = np.reshape(
                        demo_gt_heatmap,
                        (FLAGS.input_size, FLAGS.input_size, 1))
                    demo_gt_heatmap = np.repeat(demo_gt_heatmap, 3, axis=2)

                    if FLAGS.cpm_stages >= 4:
                        upper_img = np.concatenate(
                            (demo_stage_heatmaps[0], demo_stage_heatmaps[1],
                             demo_stage_heatmaps[2]),
                            axis=1)
                        if FLAGS.normalize_img:
                            blend_img = 0.5 * demo_img + 0.5 * demo_gt_heatmap
                        else:
                            blend_img = 0.5 * demo_img / 255.0 + 0.5 * demo_gt_heatmap
                        lower_img = np.concatenate(
                            (demo_stage_heatmaps[FLAGS.cpm_stages - 1],
                             demo_gt_heatmap, blend_img),
                            axis=1)
                        demo_img = np.concatenate((upper_img, lower_img),
                                                  axis=0)
                        cv2.imshow('current heatmap',
                                   (demo_img * 255).astype(np.uint8))
                        cv2.waitKey(1000)
                    else:
                        if FLAGS.normalize_img:
                            blend_img = 0.5 * demo_img + 0.5 * demo_gt_heatmap
                        else:
                            blend_img = 0.5 * demo_img / 255.0 + 0.5 * demo_gt_heatmap
                        upper_img = np.concatenate(
                            (demo_stage_heatmaps[FLAGS.cpm_stages - 1],
                             demo_gt_heatmap, blend_img),
                            axis=1)
                        cv2.imshow('current heatmap',
                                   (upper_img * 255).astype(np.uint8))
                        cv2.waitKey(1000)

            if (global_step + 1) % FLAGS.validation_iters == 0:
                mean_val_loss = 0
                cnt = 0

                while cnt < 10:
                    batch_x_np, batch_gt_heatmap_np, batch_centermap, batch_weight_np = next(
                        generator_eval)

                    # Normalize images
                    batch_x_np = batch_x_np / 255.0 - 0.5

                    #batch_gt_heatmap_np = cpm_utils.make_heatmaps_from_joints(FLAGS.input_size,
                    #                                                          FLAGS.heatmap_size,
                    #                                                          FLAGS.joint_gaussian_variance,
                    #                                                          batch_joints_np)
                    total_loss_np, summaries = sess.run(
                        [model.total_loss, merged_summary],
                        feed_dict={
                            model.input_images: batch_x_np,
                            model.cmap_placeholder: batch_centermap,
                            model.gt_hmap_placeholder: batch_gt_heatmap_np,
                            model.train_weights_placeholder: batch_weight_np
                        })
                    mean_val_loss += total_loss_np
                    cnt += 1

                print('\nValidation loss: {:>7.2f}\n'.format(mean_val_loss /
                                                             cnt))
                test_writer.add_summary(summaries, global_step)

            # Save models
            if (global_step + 1) % FLAGS.model_save_iters == 0:
                saver.save(sess=sess,
                           save_path=model_save_dir + '/' +
                           FLAGS.network_def.split('.py')[0],
                           global_step=(global_step + 1))
                print('\nModel checkpoint saved...\n')

            # Finish training
            if global_step == FLAGS.training_iters:
                saver.save(sess=sess,
                           save_path=model_save_dir + '/' +
                           FLAGS.network_def.split('.py')[0],
                           global_step=(global_step + 1))
                print('\nModel checkpoint saved...\n')
                break
    print('Training done.')
Exemplo n.º 40
0
def run_inference_graph(model, trained_checkpoint_prefix,
                        dataset, num_images, ignore_label, input_shape, pad_to_shape,
                        label_color_map, output_directory, num_classes, eval_dir,
                        min_dir, dist_dir, hist_dir, dump_dir):
    assert len(input_shape) == 3, "input shape must be rank 3"
    batch = 1
    do_ood = FLAGS.do_ood
    epsilon = FLAGS.epsilon
    dump_dir += "_" + str(epsilon)
    mean_value = 508.7571
    std_value = 77.60572284853058
    if FLAGS.max_softmax:
        thresh = 0.07100591715976332 #dim dist from sun train
        #thresh = 0.0650887573964497 #dim from sun train
    else:
        thresh = 0.37583892617449666 #dim from sun train
    effective_shape = [batch] + input_shape

    dataset = dataset.batch(batch, drop_remainder=True)
    dataset = dataset.apply(tf.data.experimental.ignore_errors())
    data_iter = dataset.make_one_shot_iterator()
    input_dict = data_iter.get_next()

    input_tensor = input_dict[dataset_builder._IMAGE_FIELD]
    annot_tensor = input_dict[dataset_builder._LABEL_FIELD]
    input_name = input_dict[dataset_builder._IMAGE_NAME_FIELD]

    annot_pl = tf.placeholder(tf.float32, annot_tensor.get_shape().as_list())
    outputs, placeholder_tensor = deploy_segmentation_inference_graph(
        model=model,
        input_shape=effective_shape,
        #input=input_tensor,
        pad_to_shape=pad_to_shape,
        input_type=tf.float32)

    pred_tensor = outputs[model.main_class_predictions_key]
    final_logits = outputs[model.final_logits_key]
    unscaled_logits = outputs[model.unscaled_logits_key]


    #mean = np.reshape(mean, [-1] + mean_dims)
    #var_inv = np.reshape(var_inv, [-1] + var_dims)
    with tf.device("gpu:1"):
        if not FLAGS.max_softmax:
            dist_class, img_dist, full_dist, min_dist, mean_p, var_inv_p, vars_noload, dbg  = process_logits(final_logits, mean, var_inv, depth, pred_tensor.get_shape().as_list(), num_classes, global_cov, global_mean)
            dist_colour = _map_to_colored_labels(dist_class, label_color_map)
            pred_colour = _map_to_colored_labels(pred_tensor, label_color_map)
            selected = min_dist

        if do_ood:
            if FLAGS.max_softmax:
                interp_logits = tf.image.resize_bilinear(unscaled_logits, pred_tensor.shape.as_list()[1:3])
                dist_pred = 1.0 - tf.reduce_max(tf.nn.softmax(interp_logits/FLAGS.t_value),-1, keepdims=True)
                dist_class = tf.to_float(dist_pred >= thresh)
                selected = dist_pred
                vars_noload = []
            else:
                #dist_pred = tf.reduce_min(tf.nn.softmax(full_dist), -1, keepdims=True)
                dist_pred = tf.expand_dims(pred_to_ood(min_dist, mean_value, std_value, thresh),-1)
                dist_class = tf.to_float(dist_pred >= thresh)
            
            #pred is the baseline of assuming all ood
            pred_tensor = tf.ones_like(pred_tensor)

    with tf.device("gpu:1"):
        neg_validity_mask = get_valid(annot_pl, ignore_label)
        with tf.variable_scope("PredIou"):
            (pred_miou, pred_conf_mat, pred_update), _ = get_miou(annot_pl, pred_tensor, num_classes, ignore_label, do_ood, neg_validity_mask)
        with tf.variable_scope("DistIou"):
            (dist_miou, dist_conf_mat, dist_update), _ = get_miou(annot_pl, dist_class, num_classes, ignore_label, do_ood, neg_validity_mask)
  
        weights = tf.to_float(neg_validity_mask)

        num_thresholds = 200

        ood_label = tf.to_float(annot_pl >= num_classes)

        with tf.variable_scope("Roc"):
            RocPoints, roc_update = tf.contrib.metrics.streaming_curve_points(ood_label,dist_pred,weights,num_thresholds,curve='ROC')
        with tf.variable_scope("Pr"):
            PrPoints, pr_update = tf.contrib.metrics.streaming_curve_points(ood_label,dist_pred,weights,num_thresholds,curve='PR')

    update_op = [pred_update, dist_update, pr_update, roc_update]
    update_op = tf.group(update_op)

    if not FLAGS.max_softmax:
        mean = np.reshape(mean, mean_p.get_shape().as_list())
        var_inv = np.reshape(var_inv, var_inv_p.get_shape().as_list())

    input_fetch = [input_name, input_tensor, annot_tensor]

    fetch = {"update": update_op,
            "selected": selected,
            "ood_label": ood_label,
        }

    dbg = []

    if FLAGS.train_kernel:
        fetch["predictions"] = pred_tensor
        fetch["min_dist_out"] = min_dist[0]

    if FLAGS.write_img:
        fetch["prediction_colour"] = pred_colour
        fetch["dist_out"] = tf.cast(dist_colour[0], tf.uint8)
        fetch["full_dist_out"] = full_dist[0]
        fetch["min_dist_out"] = min_dist[0]

    if FLAGS.write_out:
        fetch["img_dist_out"] = img_dist[0]
        fetch["unscaled_logits_out"] = unscaled_logits[0]

    grads = tf.gradients(selected, placeholder_tensor)
    if epsilon > 0.0:
        adv_img = placeholder_tensor - epsilon*tf.sign(grads)
    else:
        adv_img = tf.expand_dims(placeholder_tensor, 0)

    num_step = num_images // batch
    print("running for", num_step, "steps")
    #os.makedirs(dump_dir, exist_ok=True)

    if FLAGS.write_out:
        write_queue = Queue(30)
        num_writers = 20
        writers = [ParallelWriter(write_queue) for i in range(num_writers)]

    config = tf.ConfigProto(allow_soft_placement=True)
    #config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        init_feed = {}
        if not FLAGS.max_softmax:
            init_feed = {mean_p: mean, var_inv_p: var_inv}
        sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()],init_feed)
        vars_toload = [v for v in tf.global_variables() if v not in vars_noload]
        saver = tf.train.Saver(vars_toload)
        saver.restore(sess, trained_checkpoint_prefix)

        if FLAGS.train_kernel:
            kimg_pl, kedges_pl, kloss, ktrain_step, kfilter = kernel_model((1, 1024, 2048, 1))
            init = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="kmodel")
            sess.run(tf.variables_initializer(init))
            #sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])

        for idx in range(num_step):

            start_time = timeit.default_timer()

            inputs = sess.run(input_fetch)

            annot_raw = inputs[2]
            img_raw = inputs[1]
            image_path = inputs[0][0].decode("utf-8")
            filename = os.path.basename(image_path)
            dump_filename = os.path.join(dump_dir, filename + ".npy")

            adv_img_out = sess.run(adv_img, feed_dict={placeholder_tensor: img_raw, annot_pl: annot_raw})
            adv_img_out = adv_img_out[0]

            res, dbg_v = sess.run([fetch, dbg], feed_dict={
                            placeholder_tensor: adv_img_out, annot_pl: annot_raw})

            roc = sess.run(RocPoints)
            auc = -np.trapz(roc[:,1], roc[:,0])

            pred_miou_v, dist_miou_v = sess.run([pred_miou, dist_miou])
            # if auc > 0.1:
            #     import pdb; pdb.set_trace()
            # if idx % 25 == 0 and idx != 0:
            #     roc = sess.run(RocPoints)
            #     plt.plot(roc[:,0], roc[:,1])
            #     plt.show()

            if FLAGS.train_kernel:
                predictions = res["predictions"]
                min_dist_out = res["min_dist_out"]
                edges = cv2.Canny(predictions[0].astype(np.uint8),1,1)
                #import pdb; pdb.set_trace()
                filter = train_kernel(min_dist_out, edges, sess, kimg_pl, kedges_pl, kloss, ktrain_step, kfilter)
                #all_filters.append(filter)
                # kernel = gkern(sigma=0.2)
                dilated = np.expand_dims(cv2.filter2D(edges,-1,filter[...,0,0]),-1).astype(np.float32)
                dilated = dilated/np.max(dilated)
                
                disp = cv2.resize(np.concatenate([to_img(min_dist_out), to_img(dilated)], 1), (int(1920), int(1080)))
                cv2.imshow("test", disp)
                cv2.waitKey(1)

            if FLAGS.write_img:
                prediction_colour = res["prediction_colour"]
                dist_out = res["dist_out"]
                full_dist_out = res["full_dist_out"]
                predictions = res["predictions"]
                min_dist_out = res["min_dist_out"]

                # annot_out = res[8][0]
                # n_values = np.max(annot_out) + 1
                # one_hot_out = np.eye(n_values)[annot_out][...,0,:num_classes]

                min_dist_v = min_dist_out# np.expand_dims(np.nanmin(full_dist_out, -1), -1)
                min_dist_v[np.logical_not(np.isfinite(min_dist_v))] = np.nanmin(min_dist_out)
                min_dist_v = min_dist_v - np.min(min_dist_v) #min now at 0
                min_dist_v = (255*min_dist_v/np.max(min_dist_v)).astype(np.uint8) #max now at 255
                
                save_location = os.path.join(output_directory, filename)
                dist_filename = os.path.join(dist_dir, filename)
                min_filename = os.path.join(min_dir, filename)

                #write_hist(min_dist_out, "Min Dist", os.path.join(hist_dir, filename))

                #all_mins.append(min_dist_out)

                # if idx == 30:
                #     write_hist(all_mins, "Combined Dists", os.path.join(hist_dir, "all"))

                prediction_colour = prediction_colour.astype(np.uint8)
                output_channels = len(label_color_map[0])
                if output_channels == 1:
                    prediction_colour = np.squeeze(prediction_colour[0],-1)
                else:
                    prediction_colour = prediction_colour[0]
                #import pdb; pdb.set_trace()
                write_queue.put((idx, save_location, prediction_colour))
                write_queue.put((idx, min_filename, min_dist_v))
                write_queue.put((idx, dist_filename, dist_out))
            
            if FLAGS.write_out:
                img_dist_out = res["img_dist_out"]
                unscaled_logits_out = res["unscaled_logits_out"]

                #if not os.path.exists(dump_filename):
                write_queue.put((idx, dump_filename, {"dist": img_dist_out, "unscaled_logits": unscaled_logits_out}))
                #else:
                #    print("skipping", filename, "                          ")
            
            if FLAGS.debug:
                dist_out = res[2][0].astype(np.uint8)
                full_dist_out = res[4][0]
                min_dist_out = res[5][0]

                min_dist_v = np.expand_dims(np.nanmin(full_dist_out, -1), -1)
                min_dist_v[np.logical_not(np.isfinite(min_dist_v))] = np.nanmin(full_dist_out)
                min_dist_v = min_dist_v - np.min(min_dist_v) #min now at 0
                min_dist_v = (255*min_dist_v/np.max(min_dist_v)).astype(np.uint8) #max now at 255
                
                final_out = res[7][0]
                annot_out = inputs[2][0]
                img_out = inputs[1][0]
                
                thresh = np.median(min_dist_out)
                grain = (np.max(min_dist_out) - np.min(min_dist_out))/300
                print(thresh, "  ", grain)
                while True:
                    mask = np.expand_dims(min_dist_out < thresh,-1)
                    #cv2.imshow("img", (255*mask).astype(np.uint8))
                    cv2.imshow("img", (img_out*mask).astype(np.uint8))
                    key = cv2.waitKey(1)
                    if key == 27: #escape
                        break
                    elif key == 115: #s
                        thresh += grain
                        print(thresh, "  ", grain)
                    elif key == 119: #w
                        thresh -= grain
                        print(thresh, "  ", grain)
                    elif key == 97: #a
                        grain -= 5
                        print(thresh, "  ", grain)
                    elif key == 100: #d
                        grain += 5
                        print(thresh, "  ", grain)
                    elif key == 112: #p
                        import pdb; pdb.set_trace()
            
            elapsed = timeit.default_timer() - start_time
            end = "\r"
            if idx % 50 == 0:
                #every now and then do regular print
                end = "\n"
            if FLAGS.write_out:
                qsize = write_queue.qsize()
            else:
                qsize = 0
            print('{0:.4f} iter: {1}, pred iou: {2:.6f}, dist iou: {3:.6f}, auc:{4:0.6f}'.format(elapsed, idx+1, pred_miou_v, dist_miou_v, auc))

        if not FLAGS.write_out:
            roc = sess.run(RocPoints)
            pr = sess.run(PrPoints)
            
            make_plots(roc,pr,num_thresholds)
        
        if FLAGS.write_out:
            for w in writers:
                w.close()
        print('{0:.4f} iter: {1}, pred iou: {2:.6f}, dist iou: {3:.6f}'.format(elapsed, idx+1, pred_miou_v, dist_miou_v))
Exemplo n.º 41
0
    def __init__(self, params, hidden_weights=None):
        self.params = params
        self.network_shape = self.params['network_shape']
        self.input_dim = self.network_shape[0]
        self.output_dim = self.network_shape[-1]
        self.batch_size = self.params['batch_size']
        self.hidden_weights = hidden_weights
        # self.weights = self.initialize_weights()
        # self.mirror_weights = self.initialize_mirror_weights()
        # self.readout_weights = self.initialize_readout_weights()
        # self.hs = self.initialize_hs()
        # self.hidden_states = self.initialize_hidden_states()
        self.tensorboard_dir = self.params['tensorboard_dir']

        self.activation_function = self.params['activation_function']
        self.optimizer_ = self.params['optimizer']
        # model
        self.input = tf.placeholder(tf.float32, [None, self.input_dim],
                                    name="input")
        self.output = tf.placeholder(tf.float32, [None, self.output_dim],
                                     name="output")
        self.activation_patterns = {}
        self.hidden_state_activation_patterns = {}
        self.activation = self.input
        self.hidden_states = {}
        self.hidden_states_update_ops = {}
        for i in range(1, len(self.network_shape) - 1):
            with tf.name_scope("layer{0}".format(i)):
                h = tf.Variable(tf.truncated_normal([self.network_shape[i]]),
                                name="hidden_state",
                                trainable=False)
                # h = tf.truncated_normal([self.batch_size, self.network_shape[i]])
                self.hidden_states["hs_{0}".format(i)] = h
                Utils.variable_summaries(
                    self.hidden_states["hs_{0}".format(i)], "hs_{0}".format(i))
        if self.hidden_weights is not None:
            H_tune = tf.Variable(1.0, trainable=True, name="H_tune")
            Utils.variable_summaries(H_tune, "H_tune")
        else:
            H_tune = tf.Variable(1, trainable=False, name="H_tune")
        for i in range(len(self.network_shape) - 1):
            with tf.name_scope("layer{0}".format(i + 1)):
                if i < len(self.network_shape) - 2:
                    with tf.name_scope("hidden"):

                        # input weight and bias
                        W = tf.Variable(tf.random_normal(
                            [self.network_shape[i], self.network_shape[i + 1]],
                            stddev=0.05),
                                        name="W")
                        bW = tf.Variable(tf.random_normal(
                            [self.network_shape[i + 1]], stddev=0.05),
                                         name="bW")
                        Utils.variable_summaries(W, "W")
                        Utils.variable_summaries(bW, "bW")
                        H_name = "H_{0}".format(i + 1)
                        if self.hidden_weights is not None and H_name in self.hidden_weights.keys(
                        ):
                            H = tf.Variable(
                                self.hidden_weights[H_name].astype('float32'),
                                dtype=tf.float32,
                                trainable=False,
                                name="H")

                        else:
                            H = tf.Variable(tf.random_normal([
                                self.network_shape[i + 1],
                                self.network_shape[i + 1]
                            ],
                                                             stddev=0.05),
                                            trainable=False,
                                            name="H")

                        input_for_hidden = tf.matmul(self.activation, W) + bW
                        tiled_h = tf.reshape(
                            tf.tile(self.hidden_states["hs_{0}".format(i + 1)],
                                    [self.batch_size]), [self.batch_size, -1])
                        hidden_update = tf.nn.tanh(
                            tf.add(
                                input_for_hidden,
                                tf.matmul(tiled_h, tf.scalar_mul(H_tune, H))))

                    with tf.name_scope("mirror"):
                        # mirror input and bias
                        M = tf.Variable(tf.random_normal(
                            [self.network_shape[i], self.network_shape[i + 1]],
                            stddev=0.05),
                                        name="M")
                        bM = tf.Variable(tf.random_normal(
                            [self.network_shape[i + 1]], stddev=0.05),
                                         name="bM")
                        Utils.variable_summaries(M, "M")
                        Utils.variable_summaries(bM, "bM")
                        input_for_mirror = tf.nn.tanh(
                            tf.matmul(self.activation, M) + bM)

                    with tf.name_scope("readout"):
                        # readout weights and biases
                        R = tf.Variable(tf.random_normal([
                            self.network_shape[i + 1],
                            self.network_shape[i + 1]
                        ],
                                                         stddev=0.05),
                                        name="R")
                        bR = tf.Variable(tf.random_normal(
                            [self.network_shape[i + 1]], stddev=0.05),
                                         name="bR")
                        Utils.variable_summaries(R, "R")
                        Utils.variable_summaries(bR, "bR")
                        readout = self.activation_function(
                            tf.matmul(hidden_update, R) + bR)

                    with tf.name_scope("activation"):
                        self.activation = self.activation_function(
                            tf.multiply(readout, input_for_mirror))
                    # self.hidden_state_activation_patterns['hidden_state_layer_{0}'.format(i + 1)] = self.hidden_states[self.hidden_states["hs_{0}".format(i+1)]]
                    self.hidden_states_update_ops["hs_{0}".format(
                        i + 1)] = self.hidden_states["hs_{0}".format(
                            i + 1)].assign(hidden_update[0])
                    # self.hidden_states["hs_{0}".format(i+1)] = hidden_update
                else:
                    W = tf.Variable(tf.random_normal(
                        [self.network_shape[i], self.network_shape[i + 1]],
                        stddev=0.05),
                                    name="W")
                    bW = tf.Variable(tf.random_normal(
                        [self.network_shape[i + 1]], stddev=0.05),
                                     name="bW")
                    Utils.variable_summaries(W, "W")
                    Utils.variable_summaries(bW, "bW")
                    with tf.name_scope("activation"):
                        self.activation = self.activation_function(
                            tf.matmul(self.activation, W) + bW)
                act = self.activation
                if i > 0:
                    self.activation_patterns['layer_{0}'.format(i)] = act

        # cost
        with tf.name_scope("cost"):
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    self.activation, self.output))
            tf.summary.scalar('cost', self.cost)

        with tf.name_scope("accuracy"):
            correct_prediction = tf.equal(tf.argmax(self.activation, 1),
                                          tf.argmax(self.output, 1))
            self.accuracy = tf.reduce_mean(
                tf.cast(correct_prediction, tf.float32))
            tf.summary.scalar('accuracy', self.accuracy)

        self.optimizer = self.optimizer_.minimize(self.cost)

        self.sess = tf.Session()

        self.merged = tf.summary.merge_all()
        self.summ_writer = tf.summary.FileWriter(self.tensorboard_dir,
                                                 self.sess.graph)

        init = tf.global_variables_initializer()

        self.sess.run(init)
Exemplo n.º 42
0
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    # train_op = optimizer.minimize(loss[0] + l2_loss, var_list=update_vars, global_step=global_step)
    # apply gradient clip to avoid gradient exploding
    gvs = optimizer.compute_gradients(loss[0] + l2_loss, var_list=update_vars)
    clip_grad_var = [gv if gv[0] is None else [
          tf.clip_by_norm(gv[0], 100.), gv[1]] for gv in gvs]
    train_op = optimizer.apply_gradients(clip_grad_var, global_step=global_step)

if args.save_optimizer:
    print('Saving optimizer parameters to checkpoint! Remember to restore the global_step in the fine-tuning afterwards.')
    saver_to_save = tf.train.Saver()
    saver_best = tf.train.Saver()

with tf.Session() as sess:
    sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
    saver_to_restore.restore(sess, args.restore_path)
    merged = tf.summary.merge_all()
    writer = tf.summary.FileWriter(args.log_dir, sess.graph)

    print('\n----------- start to train -----------\n')

    best_mAP = -np.Inf

    for epoch in range(args.total_epoches):

        sess.run(train_init_op)
        loss_total, loss_xy, loss_wh, loss_conf, loss_class = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()

        for i in trange(args.train_batch_num):
            _, summary, __y_pred, __y_true, __loss, __global_step, __lr = sess.run(
Exemplo n.º 43
0
def train(args, sess, model):
    #Adam optimizers are used instead of AdaDelta
    d_optimizer = tf.train.AdamOptimizer(args.learning_rate,
                                         beta1=args.momentum,
                                         name="AdamOptimizer_D").minimize(
                                             model.d_loss,
                                             var_list=model.d_vars)
    c_optimizer = tf.train.AdamOptimizer(args.learning_rate,
                                         beta1=args.momentum,
                                         name="AdamOptimizer_C").minimize(
                                             model.recon_loss,
                                             var_list=model.c_vars)

    global_optimizer = tf.train.AdamOptimizer(args.learning_rate,
                                              beta1=args.momentum,
                                              name="AdamOptimizer_C").minimize(
                                                  model.loss_all,
                                                  var_list=model.c_vars)

    epoch = 0
    step = 0
    global_step = 0

    #saver
    saver = tf.train.Saver()
    if args.continue_training:
        tf.local_variables_initializer().run()
        last_ckpt = tf.train.latest_checkpoint(args.checkpoints_path)
        saver.restore(sess, last_ckpt)
        ckpt_name = str(last_ckpt)
        print("Loaded model file from " + ckpt_name)
        epoch = int(ckpt_name.split('-')[-1])
    else:
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    #summary init
    all_summary = tf.summary.merge([
        model.recon_loss_sum, model.d_loss_sum, model.loss_all_sum,
        model.input_img_sum, model.real_img_sum, model.recon_img_sum,
        model.g_local_imgs_sum, model.r_local_imgs_sum
    ])
    writer = tf.summary.FileWriter(args.graph_path, sess.graph)

    #training starts here
    #first train completion network
    while epoch < args.train_step:

        #Training Stage 1 (Completion Network)
        if epoch < args.Tc:
            summary, c_loss, _ = sess.run(
                [all_summary, model.recon_loss, c_optimizer])
            writer.add_summary(summary, global_step)
            print("Epoch [%d] Step [%d] C Loss: [%.4f]" %
                  (epoch, step, c_loss))
        elif epoch < args.Tc + args.Td:
            #Training Stage 2 (Discriminator Network)
            summary, d_loss, _ = sess.run(
                [all_summary, model.d_loss, d_optimizer])
            writer.add_summary(summary, global_step)
            print("Epoch [%d] Step [%d] D Loss: [%.4f]" %
                  (epoch, step, d_loss))
        else:
            #Training Stage 3 (Completion Network)
            summary, g_loss, _ = sess.run(
                [all_summary, model.loss_all, global_optimizer])
            writer.add_summary(summary, global_step)
            print("Epoch [%d] Step [%d] C Loss: [%.4f]" %
                  (epoch, step, g_loss))

        # Check Test image results every time epoch is finished
        if step * args.batch_size >= model.data_count:
            saver.save(sess,
                       args.checkpoints_path + "/model",
                       global_step=epoch)

            #res_img = sess.run(model.test_res_imgs)

            ## save test img result
            #img_tile(epoch, args, res_img)
            step = 0
            epoch += 1

        step += 1
        global_step += 1

    coord.request_stop()
    coord.join(threads)
    sess.close()
    print("Done.")
Exemplo n.º 44
0
def main():
    """
    Main program
    """
    local_device_protos = device_lib.list_local_devices()
    logging.info(
        [x.name for x in local_device_protos if x.device_type == 'GPU'])

    bq = _bq.BQHandler()
    io = _io.IO(gs_bucket=options.gs_bucket)
    viz = _viz.Viz()

    starttime, endtime = io.get_dates(options)
    #save_path = options.save_path+'/'+options.config_name

    logging.info('Using dataset {} and time range {} - {}'.format(
        options.feature_dataset, starttime.strftime('%Y-%m-%d'),
        endtime.strftime('%Y-%m-%d')))

    all_param_names = options.label_params + options.feature_params + options.meta_params
    aggs = io.get_aggs_from_param_names(options.feature_params)

    logging.info('Reading data...')
    bq.set_params(starttime,
                  endtime,
                  batch_size=2500000,
                  loc_col='trainstation',
                  project=options.project,
                  dataset=options.feature_dataset,
                  table=options.feature_table,
                  parameters=all_param_names,
                  only_winters=options.only_winters)

    data = bq.get_rows()

    data = io.filter_train_type(labels_df=data,
                                train_types=options.train_types,
                                sum_types=True,
                                train_type_column='train_type',
                                location_column='trainstation',
                                time_column='time',
                                sum_columns=['train_count', 'delay'],
                                aggs=aggs)

    if options.y_avg_hours is not None:
        data = io.calc_running_delay_avg(data, options.y_avg_hours)

    if options.y_avg:
        data = io.calc_delay_avg(data)

    data.sort_values(by=['time', 'trainstation'], inplace=True)

    if options.normalize:
        logging.info('Normalizing data...')
        xscaler = StandardScaler()
        yscaler = StandardScaler()

        non_scaled_data = data.loc[:, options.meta_params]
        labels = data.loc[:, options.label_params].astype(
            np.float32).values.reshape((-1, 1))

        yscaler.fit(labels)
        scaled_labels = pd.DataFrame(yscaler.transform(labels),
                                     columns=['delay'])
        scaled_features = pd.DataFrame(xscaler.fit_transform(
            data.loc[:, options.feature_params].astype(np.float32)),
                                       columns=options.feature_params)

        data = pd.concat([non_scaled_data, scaled_features, scaled_labels],
                         axis=1)

    if options.pca:
        logging.info('Doing PCA analyzis for the data...')
        ipca = IncrementalPCA(n_components=options.pca_components,
                              whiten=options.whiten,
                              copy=False)

        non_processed_data = data.loc[:, options.meta_params +
                                      options.label_params]
        processed_data = data.loc[:, options.feature_params]
        ipca.fit(processed_data)
        processed_features = pd.DataFrame(ipca.transform(processed_data))

        data = pd.concat([non_processed_data, processed_data], axis=1)

        fname = options.output_path + '/ipca_explained_variance.png'
        viz.explained_variance(ipca, fname)
        io._upload_to_bucket(filename=fname, ext_filename=fname)

    data_train, data_test = train_test_split(data, test_size=0.33)
    X_test, y_test = io.extract_batch(data_test,
                                      options.time_steps,
                                      batch_size=None,
                                      pad_strategy=options.pad_strategy,
                                      quantile=options.quantile,
                                      label_params=options.label_params,
                                      feature_params=options.feature_params)

    # Define model
    batch_size = io.get_batch_size(data_train,
                                   options.pad_strategy,
                                   quantile=options.quantile)
    logging.info('Batch size: {}'.format(batch_size))
    model = LSTM.LSTM(options.time_steps,
                      len(options.feature_params),
                      1,
                      options.n_hidden,
                      options.lr,
                      options.p_drop,
                      batch_size=batch_size)

    # Initialization
    rmses, mses, maes, steps, train_mse = [], [], [], [], []
    saver = tf.train.Saver()
    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)
    summary_writer = tf.summary.FileWriter(options.log_dir,
                                           graph=tf.get_default_graph())

    #tf.summary.scalar('Training MSE', model.loss)
    tf.summary.scalar('Validation_MSE', model.mse)
    tf.summary.scalar('Validation_RMSE', model.rmse)
    tf.summary.scalar('Validation_MAE', model.mae)
    tf.summary.histogram('y_pred_hist', model.y_pred)
    merged_summary_op = tf.summary.merge_all()
    train_summary_op = tf.summary.scalar('Training_MSE', model.loss)

    train_step = 0
    start = 0
    while True:
        # If slow is set, go forward one time step at time,
        # else proceed whole batch size
        if options.slow:
            X_train, y_train = io.extract_batch(
                data_train,
                options.time_steps,
                start=start,
                pad_strategy=options.pad_strategy,
                quantile=options.quantile,
                label_params=options.label_params,
                feature_params=options.feature_params)
        else:
            X_train, y_train = io.extract_batch(
                data_train,
                options.time_steps,
                train_step,
                pad_strategy=options.pad_strategy,
                quantile=options.quantile,
                label_params=options.label_params,
                feature_params=options.feature_params)

        if (len(X_train) < options.time_steps):
            break

        if options.cv:
            logging.info('Doing random search for hyper parameters...')

            param_grid = {
                "C": [0.001, 0.01, 0.1, 1, 10],
                "epsilon": [0.01, 0.1, 0.5],
                "kernel": ['rbf', 'linear', 'poly', 'sigmoid', 'precomputed'],
                "degree": [2, 3, 4],
                "shrinking": [True, False],
                "gamma": [0.001, 0.01, 0.1],
                "coef0": [0, 0.1, 1]
            }

            random_search = RandomizedSearchCV(model,
                                               param_distributions=param_grid,
                                               n_iter=int(
                                                   options.n_iter_search),
                                               n_jobs=-1)

            random_search.fit(X_train, y_train)
            logging.info("RandomizedSearchCV done.")
            fname = options.output_path + '/random_search_cv_results.txt'
            report_cv_results(random_search.cv_results_, fname)
            io._upload_to_bucket(filename=fname, ext_filename=fname)
            sys.exit()
        else:
            if train_step == 0:
                logging.info('Training...')

            feed_dict = {model.X: X_train, model.y: y_train}
            _, loss, train_summary = sess.run(
                [model.train_op, model.loss, train_summary_op],
                feed_dict=feed_dict)

            summary_writer.add_summary(train_summary, train_step * batch_size)

        # Metrics
        feed_dict = {model.X: X_test, model.y: y_test}
        #model.cell_init_state: state}

        val_loss, rmse, mse, mae, y_pred, summary = sess.run(
            [
                model.loss, model.rmse, model.mse, model.mae, model.y_pred,
                merged_summary_op
            ],
            feed_dict=feed_dict)

        train_mse.append(loss)
        mses.append(mse)
        rmses.append(rmse)
        maes.append(mae)
        steps.append(train_step)

        summary_writer.add_summary(summary, train_step * batch_size)
        if train_step % 50 == 0:
            logging.info("Step {}:".format(train_step))
            logging.info("Training loss: {:.4f}".format(loss))
            logging.info("Validation MSE: {:.4f}".format(val_loss))
            logging.info('Validation RMSE: {}'.format(rmse))
            logging.info('Validation MAE: {}'.format(mae))
            logging.info('................')
            saver.save(sess, options.save_file)

        train_step += 1
        start += 1
        # <-- while True:

    saver.save(sess, options.save_file)
    if options.normalize:
        fname = options.save_path + '/yscaler.pkl'
        io.save_scikit_model(yscaler, fname, fname)
    io._upload_dir_to_bucket(options.save_path, options.save_path)

    try:
        fname = options.output_path + '/learning_over_time.png'
        metrics = [{
            'metrics': [{
                'values': mses,
                'label': 'Validation MSE'
            }, {
                'values': train_mse,
                'label': 'Train MSE'
            }],
            'y_label':
            'MSE'
        }, {
            'metrics': [{
                'values': rmses,
                'label': 'Validation RMSE'
            }],
            'y_label': 'RMSE'
        }, {
            'metrics': [{
                'values': maes,
                'label': 'Validation MAE'
            }],
            'y_label': 'MAE'
        }]
        viz.plot_learning(metrics, fname)
        io._upload_to_bucket(filename=fname, ext_filename=fname)
    except Exception as e:
        logging.error(e)

    error_data = {
        'steps': steps,
        'mse': mses,
        'rmse': rmses,
        'mae': maes,
        'train_mse': train_mse
    }
    fname = '{}/training_time_validation_errors.csv'.format(
        options.output_path)
    io.write_csv(error_data, filename=fname, ext_filename=fname)
def train(train_data, test_data=None):

    G = train_data[0]
    features = train_data[1]
    id_map = train_data[2]
    class_map  = train_data[4]
    if isinstance(list(class_map.values())[0], list):
        num_classes = len(list(class_map.values())[0])
    else:
        num_classes = len(set(class_map.values()))

    if not features is None:
        # pad with dummy zero vector
        features = np.vstack([features, np.zeros((features.shape[1],))])

    context_pairs = train_data[3] if FLAGS.random_context else None
    placeholders = construct_placeholders(num_classes)
    minibatch = NodeMinibatchIterator(G, 
            id_map,
            placeholders, 
            class_map,
            num_classes,
            batch_size=FLAGS.batch_size,
            max_degree=FLAGS.max_degree, 
            context_pairs = context_pairs)
    adj_info_ph = tf.placeholder(tf.int32, shape=minibatch.adj.shape)
    adj_info = tf.Variable(adj_info_ph, trainable=False, name="adj_info")

    if FLAGS.model == 'graphsage_mean':
        # Create model
        sampler = UniformNeighborSampler(adj_info)
        if FLAGS.samples_3 != 0:
            layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                                SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2),
                                SAGEInfo("node", sampler, FLAGS.samples_3, FLAGS.dim_2)]
        elif FLAGS.samples_2 != 0:
            layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                                SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)]
        else:
            layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1)]

        model = SupervisedGraphsage(num_classes, placeholders, 
                                     features,
                                     adj_info,
                                     minibatch.deg,
                                     layer_infos, 
                                     model_size=FLAGS.model_size,
                                     sigmoid_loss = FLAGS.sigmoid,
                                     identity_dim = FLAGS.identity_dim,
                                     logging=True)
    elif FLAGS.model == 'gcn':
        # Create model
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, 2*FLAGS.dim_1),
                            SAGEInfo("node", sampler, FLAGS.samples_2, 2*FLAGS.dim_2)]

        model = SupervisedGraphsage(num_classes, placeholders, 
                                     features,
                                     adj_info,
                                     minibatch.deg,
                                     layer_infos=layer_infos, 
                                     aggregator_type="gcn",
                                     model_size=FLAGS.model_size,
                                     concat=False,
                                     sigmoid_loss = FLAGS.sigmoid,
                                     identity_dim = FLAGS.identity_dim,
                                     logging=True)

    elif FLAGS.model == 'graphsage_seq':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)]

        model = SupervisedGraphsage(num_classes, placeholders, 
                                     features,
                                     adj_info,
                                     minibatch.deg,
                                     layer_infos=layer_infos, 
                                     aggregator_type="seq",
                                     model_size=FLAGS.model_size,
                                     sigmoid_loss = FLAGS.sigmoid,
                                     identity_dim = FLAGS.identity_dim,
                                     logging=True)

    elif FLAGS.model == 'graphsage_maxpool':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)]

        model = SupervisedGraphsage(num_classes, placeholders, 
                                    features,
                                    adj_info,
                                    minibatch.deg,
                                     layer_infos=layer_infos, 
                                     aggregator_type="maxpool",
                                     model_size=FLAGS.model_size,
                                     sigmoid_loss = FLAGS.sigmoid,
                                     identity_dim = FLAGS.identity_dim,
                                     logging=True)

    elif FLAGS.model == 'graphsage_meanpool':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)]

        model = SupervisedGraphsage(num_classes, placeholders, 
                                    features,
                                    adj_info,
                                    minibatch.deg,
                                     layer_infos=layer_infos, 
                                     aggregator_type="meanpool",
                                     model_size=FLAGS.model_size,
                                     sigmoid_loss = FLAGS.sigmoid,
                                     identity_dim = FLAGS.identity_dim,
                                     logging=True)

    else:
        raise Exception('Error: model name unrecognized.')

    config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)
    config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION
    config.allow_soft_placement = True
    
    # Initialize session
    sess = tf.Session(config=config)
    merged = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(log_dir(), sess.graph)
     
    # Init variables
    sess.run(tf.global_variables_initializer(), feed_dict={adj_info_ph: minibatch.adj})
    
    # Train model
    
    total_steps = 0
    avg_time = 0.0
    epoch_val_costs = []

    train_adj_info = tf.assign(adj_info, minibatch.adj)
    val_adj_info = tf.assign(adj_info, minibatch.test_adj)
    for epoch in range(FLAGS.epochs): 
        minibatch.shuffle() 

        iter = 0
        print('Epoch: %04d' % (epoch + 1))
        epoch_val_costs.append(0)
        while not minibatch.end():
            # Construct feed dictionary
            feed_dict, labels = minibatch.next_minibatch_feed_dict()
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            t = time.time()
            # Training step
            outs = sess.run([merged, model.opt_op, model.loss,model.outputs1,model.preds], feed_dict=feed_dict)
            train_cost = outs[2]

            if iter % FLAGS.validate_iter == 0:
                # Validation
                sess.run(val_adj_info.op)
                if FLAGS.validate_batch_size == -1:
                    val_cost, val_f1_mic, val_f1_mac, duration = incremental_evaluate(sess, model, minibatch, FLAGS.batch_size)
                else:
                    val_cost, val_f1_mic, val_f1_mac, duration = evaluate(sess, model, minibatch, FLAGS.validate_batch_size)
                sess.run(train_adj_info.op)
                epoch_val_costs[-1] += val_cost

            if total_steps % FLAGS.print_every == 0:
                summary_writer.add_summary(outs[0], total_steps)
    
            # Print results
            avg_time = (avg_time * total_steps + time.time() - t) / (total_steps + 1)

            if total_steps % FLAGS.print_every == 0:
                train_f1_mic, train_f1_mac = calc_f1(labels, outs[-1])
                print("Iter:", '%04d' % iter, 
                      "train_loss=", "{:.5f}".format(train_cost),
                      "train_f1_mic=", "{:.5f}".format(train_f1_mic), 
                      "train_f1_mac=", "{:.5f}".format(train_f1_mac), 
                      "val_loss=", "{:.5f}".format(val_cost),
                      "val_f1_mic=", "{:.5f}".format(val_f1_mic), 
                      "val_f1_mac=", "{:.5f}".format(val_f1_mac), 
                      "time=", "{:.5f}".format(avg_time))
 
            iter += 1
            total_steps += 1

            if total_steps > FLAGS.max_total_steps:
                break

        if total_steps > FLAGS.max_total_steps:
                break
    
    print("Optimization Finished!")
    if FLAGS.save_embeddings:


      sess.run(val_adj_info.op)
      save_val_embeddings(sess, model, minibatch, FLAGS.validate_batch_size, log_dir())
      
      val_cost, val_f1_mic, val_f1_mac, duration = incremental_evaluate(sess, model, minibatch, FLAGS.batch_size)
      print("Full validation stats:",
                    "loss=", "{:.5f}".format(val_cost),
                    "f1_micro=", "{:.5f}".format(val_f1_mic),
                    "f1_macro=", "{:.5f}".format(val_f1_mac),
                    "time=", "{:.5f}".format(duration))
      with open(log_dir() + "val_stats.txt", "w") as fp:
          fp.write("loss={:.5f} f1_micro={:.5f} f1_macro={:.5f} time={:.5f}".
                  format(val_cost, val_f1_mic, val_f1_mac, duration))

      print("Writing test set stats to file (don't peak!)")
      val_cost, val_f1_mic, val_f1_mac, duration = incremental_evaluate(sess, model, minibatch, FLAGS.batch_size, test=True)
      with open(log_dir() + "test_stats.txt", "w") as fp:
          fp.write("loss={:.5f} f1_micro={:.5f} f1_macro={:.5f}".
                  format(val_cost, val_f1_mic, val_f1_mac))
    def train(self,para_dict):
        #----var parsing
        epochs = para_dict['epochs']
        GPU_ratio = para_dict['GPU_ratio']
        batch_size = para_dict['batch_size']
        ratio=para_dict['ratio']


        #----local var
        train_loss_list = list()
        train_acc_list = list()
        test_loss_list = list()
        test_acc_list = list()
        epoch_time_list = list()
        img_quantity = 0

        self.content = self.log_update(self.content, para_dict)

        #----ratio
        if ratio <= 1.0:
            img_quantity = int(self.train_paths.shape[0] * ratio)
            self.train_paths = self.train_paths[:img_quantity]
            self.train_labels = self.train_labels[:img_quantity]
            print("img_quantity:",img_quantity)
        else:
            img_quantity = self.train_paths.shape[0]

        #----calculate iterations of one epoch
        train_ites = math.ceil(self.train_paths.shape[0] / batch_size)
        # if self.test_img_dir is not None:
        #     test_ites = math.ceil(self.test_paths.shape[0] / batch_size)

        #----GPU setting
        config = tf.ConfigProto(log_device_placement=True,
                                allow_soft_placement=True)
        if GPU_ratio is None:
            config.gpu_options.allow_growth = True
        else:
            config.gpu_options.per_process_gpu_memory_fraction = GPU_ratio

        with tf.Session(config=config) as sess:
            #----tranfer learning check
            files = [file.path for file in os.scandir(self.save_dir) if file.name.split(".")[-1] == 'meta']
            if len(files) == 0:
                sess.run(tf.global_variables_initializer())
                print("no previous model param can be used!")
            else:
                check_name = files[-1].split("\\")[-1].split(".")[0]
                model_path = os.path.join(self.save_dir,check_name)
                self.saver.restore(sess,model_path)
                msg = "use previous model param:{}".format(model_path)
                print(msg)

            print("img_quantity:", img_quantity)
            #----epoch training
            for epoch in range(epochs):
                #----record the start time
                d_t = time.time()

                train_loss = 0
                train_acc = 0
                test_loss = 0
                test_acc = 0

                #----shuffle
                indice = np.random.permutation(self.train_paths.shape[0])
                self.train_paths = self.train_paths[indice]
                self.train_labels = self.train_labels[indice]

                #----do optimizers(training by iteration)
                for index in range(train_ites):
                    #----get image start and end numbers
                    num_start = index * batch_size
                    num_end = np.minimum(num_start + batch_size, self.train_paths.shape[0])

                    #d_t_2 = time.time()
                    batch_data = self.get_4D_data(self.train_paths[num_start:num_end],self.model_shape[1:])
                    #rint("Batch data process time:", d_t_2)

                    #----put all data to tf placeholders
                    feed_dict = {self.tf_input:batch_data,
                                 self.tf_label_batch:self.train_labels[num_start:num_end],
                                 self.tf_keep_prob:0.8,
                                 self.tf_phase_train:True}

                    #----session run
                    sess.run(self.optimizer,feed_dict=feed_dict)


                #----evaluation(training set)
                for index in range(train_ites):
                    #----get image start and end numbers
                    num_start = index * batch_size
                    num_end = np.minimum(num_start + batch_size, self.train_paths.shape[0])


                    batch_data = self.get_4D_data(self.train_paths[num_start:num_end],self.model_shape[1:])


                    #----put all data to tf placeholders
                    feed_dict = {self.tf_input:batch_data,
                                 self.tf_label_batch:self.train_labels[num_start:num_end],
                                 self.tf_keep_prob:1.0,
                                 self.tf_phase_train:False}

                    #----session run
                    loss_temp, predict_temp = sess.run([self.loss,self.prediction],feed_dict=feed_dict)

                    #----calculate the loss and accuracy
                    train_loss += loss_temp
                    train_acc += self.evaluation(predict_temp,self.train_labels[num_start:num_end])



                train_loss /= train_ites
                train_acc /= self.train_paths.shape[0]

                #-----testing set(LFW) evaluation
                if self.test_img_dir is not None:
                    test_acc = self.eval_on_lfw(sess, feed_dict, self.test_img_dir, batch_size=batch_size)

                #print("train_loss:{}, train_acc:{}".format(train_loss,train_acc))

                #----evaluation(test set)
                # if self.test_img_dir is not None:
                #     for index in range(test_ites):
                #         # ----get image start and end numbers
                #         num_start = index * batch_size
                #         num_end = np.minimum(num_start + batch_size, self.test_paths.shape[0])
                #
                #         batch_data = self.get_4D_data(self.test_paths[num_start:num_end], self.model_shape[1:])
                #
                #         # ----put all data to tf placeholders
                #         feed_dict = {self.tf_input: batch_data,
                #                      self.tf_label_batch: self.test_labels[num_start:num_end],
                #                      self.tf_keep_prob: 1.0}
                #
                #         # ----session run
                #         loss_temp, predict_temp = sess.run([self.loss, self.prediction], feed_dict=feed_dict)
                #
                #         # ----calculate the loss and accuracy
                #         test_loss += loss_temp
                #         test_acc += self.evaluation(predict_temp, self.test_labels[num_start:num_end])
                #
                #     test_loss /= test_ites
                #     test_acc /= self.test_paths.shape[0]
                #     #print("test_loss:{}, test_acc:{}".format(test_loss, test_acc))

                #----save ckpt, pb files
                model_save_path = self.saver.save(sess,self.out_dir_prefix,global_step=epoch)
                print("save model CKPT to ",model_save_path)

                graph = tf.get_default_graph().as_graph_def()
                output_graph_def = tf.graph_util.convert_variables_to_constants(sess,graph,self.pb_save_list)
                with tf.gfile.GFile(self.pb_save_path,'wb')as f:
                    f.write(output_graph_def.SerializeToString())
                print("save PB file to ",self.pb_save_path)

                #----record the end time
                d_t = time.time() - d_t

                #----save results in the log file
                train_loss_list.append(float(train_loss))
                train_acc_list.append(float(train_acc))
                if self.test_img_dir is not None:
                    #test_loss_list.append(float(test_loss))
                    test_acc_list.append(float(test_acc))

                self.content["train_loss_list"] = train_loss_list
                self.content["train_acc_list"] = train_acc_list
                if self.test_img_dir is not None:
                    #self.content["test_loss_list"] = test_loss_list
                    self.content["test_acc_list"] = test_acc_list

                epoch_time_list.append(d_t)
                self.content['ave_epoch_time'] = float(np.average(epoch_time_list))

                with open(self.log_path, 'w') as f:
                    json.dump(self.content,f)

                print("save the log file in ",self.log_path)



                #----display training results
                print("Epoch: ",epoch)
                print("training loss:{}, accuracy:{}".format(train_loss,train_acc))
                if self.test_img_dir is not None:
                    print("test set accuracy:{}".format( test_acc))

                print("Epoch time consumption:",d_t)
Exemplo n.º 47
0
def main(gpu_id = None):

    # Select gpu
    if gpu_id is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id

    # Reset Tensorflow graph
    tf.reset_default_graph()

    # Placeholders for the tensorflow model
    x = tf.placeholder(tf.float32, shape=[par['batch_train_size'],*par['n_input']],name='input')
    y = tf.placeholder(tf.float32, shape=[par['batch_train_size'],par['n_output']],name='target')

    # Generate stimulus
    stim = Stimulus()

    # Model stats
    losses = []
    testing_losses = []
    save_iter = []

    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config) as sess:

        device = '/cpu:0' if gpu_id is None else '/gpu:0'
        with tf.device(device):
            model = ConvModelTop(x,y)
        
        init = tf.global_variables_initializer()
        sess.run(init)
        saver = tf.train.Saver()

        prev_loss = 1000000
        start = time.time()
        for i in range(par['num_iterations']):

            # Generate training batch and train model
            input_data, target_data, _ = stim.generate_train_batch()
            feed_dict = {x: input_data, y: target_data}
            _, train_loss, model_output = sess.run([model.train_op, model.loss, model.output], feed_dict=feed_dict)

            # Check current status
            if i % par['print_iter'] == 0:

                # Print current status
                print_conv_stats(i, train_loss, time.time()-start)
                losses.append(train_loss)
                save_iter.append(i)

                # Test and save model
                if i % par['save_iter'] == 0:

                    # Generate test bach and get model performance
                    test_input, test_target, _ = stim.generate_test_batch()
                    feed_dict = {x: test_input, y: test_target}
                    test_loss, test_output = sess.run([model.loss, model.output], feed_dict=feed_dict)
                    testing_losses.append(test_loss)

                    # Plot model outputs
                    if test_loss < prev_loss:
                        prev_loss = test_loss
                        plot_conv_outputs(target_data, model_output, test_target, test_output, i)

                        # Save training stats and model
                        weight = sess.run(tf.get_collection(tf.GraphKeys.VARIABLES, 'filters/kernel')[0])
                        pickle.dump({'iter':save_iter,'weight':weight,'losses': losses, 'test_loss': testing_losses, 'last_iter': i}, \
                            open(par['save_dir']+'run_'+str(par['run_number'])+'_model_stats.pkl', 'wb'))
                        
                        saved_path = saver.save(sess, './conv_model_top')
                        print('model saved in {}'.format(saved_path))

                    # Stop training
                    if test_loss < 50:
                        break

                # Plot loss curve
                if i > 0:
                    plt.plot(losses[1:])
                    plt.savefig(par['save_dir']+'run_'+str(par['run_number'])+'_training_curve.png')
                    plt.close()
Exemplo n.º 48
0
def train(sess, loss, x_train, y_train,
          init_all=False, evaluate=None, feed=None, args=None,
          rng=None, var_list=None, fprop_args=None, optimizer=None,
          devices=None, x_batch_preprocessor=None, use_ema=False,
          ema_decay=.998, run_canary=None,
          loss_threshold=1e5, dataset_train=None, dataset_size=None):
  """
  Run (optionally multi-replica, synchronous) training to minimize `loss`
  :param sess: TF session to use when training the graph
  :param loss: tensor, the loss to minimize
  :param x_train: numpy array with training inputs or tf Dataset
  :param y_train: numpy array with training outputs or tf Dataset
  :param init_all: (boolean) If set to true, all TF variables in the session
                   are (re)initialized, otherwise only previously
                   uninitialized variables are initialized before training.
  :param evaluate: function that is run after each training iteration
                   (typically to display the test/validation accuracy).
  :param feed: An optional dictionary that is appended to the feeding
               dictionary before the session runs. Can be used to feed
               the learning phase of a Keras model for instance.
  :param args: dict or argparse `Namespace` object.
               Should contain `nb_epochs`, `learning_rate`,
               `batch_size`
  :param rng: Instance of numpy.random.RandomState
  :param var_list: Optional list of parameters to train.
  :param fprop_args: dict, extra arguments to pass to fprop (loss and model).
  :param optimizer: Optimizer to be used for training
  :param devices: list of device names to use for training
      If None, defaults to: all GPUs, if GPUs are available
                            all devices, if no GPUs are available
  :param x_batch_preprocessor: callable
      Takes a single tensor containing an x_train batch as input
      Returns a single tensor containing an x_train batch as output
      Called to preprocess the data before passing the data to the Loss
  :param use_ema: bool
      If true, uses an exponential moving average of the model parameters
  :param ema_decay: float or callable
      The decay parameter for EMA, if EMA is used
      If a callable rather than a float, this is a callable that takes
      the epoch and batch as arguments and returns the ema_decay for
      the current batch.
  :param loss_threshold: float
      Raise an exception if the loss exceeds this value.
      This is intended to rapidly detect numerical problems.
      Sometimes the loss may legitimately be higher than this value. In
      such cases, raise the value. If needed it can be np.inf.
  :param dataset_train: tf Dataset instance.
      Used as a replacement for x_train, y_train for faster performance.
    :param dataset_size: integer, the size of the dataset_train.
  :return: True if model trained
  """

  # Check whether the hardware is working correctly
  canary.run_canary()
  if run_canary is not None:
    warnings.warn("The `run_canary` argument is deprecated. The canary "
                  "is now much cheaper and thus runs all the time. The "
                  "canary now uses its own loss function so it is not "
                  "necessary to turn off the canary when training with "
                  " a stochastic loss. Simply quit passing `run_canary`."
                  "Passing `run_canary` may become an error on or after "
                  "2019-10-16.")

  args = _ArgsWrapper(args or {})
  fprop_args = fprop_args or {}

  # Check that necessary arguments were given (see doc above)
  # Be sure to support 0 epochs for debugging purposes
  if args.nb_epochs is None:
    raise ValueError("`args` must specify number of epochs")
  if optimizer is None:
    if args.learning_rate is None:
      raise ValueError("Learning rate was not given in args dict")
  assert args.batch_size, "Batch size was not given in args dict"

  if rng is None:
    rng = np.random.RandomState()

  if optimizer is None:
    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
  else:
    if not isinstance(optimizer, tf.train.Optimizer):
      raise ValueError("optimizer object must be from a child class of "
                       "tf.train.Optimizer")

  grads = []
  xs = []
  preprocessed_xs = []
  ys = []
  if dataset_train is not None:
    assert x_train is None and y_train is None and x_batch_preprocessor is None
    if dataset_size is None:
      raise ValueError("You must provide a dataset size")
    data_iterator = dataset_train.make_one_shot_iterator().get_next()
    x_train, y_train = sess.run(data_iterator)

  devices = infer_devices(devices)
  for device in devices:
    with tf.device(device):
      x = tf.placeholder(x_train.dtype, (None,) + x_train.shape[1:])
      y = tf.placeholder(y_train.dtype, (None,) + y_train.shape[1:])
      xs.append(x)
      ys.append(y)

      if x_batch_preprocessor is not None:
        x = x_batch_preprocessor(x)

      # We need to keep track of these so that the canary can feed
      # preprocessed values. If the canary had to feed raw values,
      # stochastic preprocessing could make the canary fail.
      preprocessed_xs.append(x)

      loss_value = loss.fprop(x, y, **fprop_args)

      grads.append(optimizer.compute_gradients(
          loss_value, var_list=var_list))
  num_devices = len(devices)
  print("num_devices: ", num_devices)

  grad = avg_grads(grads)
  # Trigger update operations within the default graph (such as batch_norm).
  with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
    train_step = optimizer.apply_gradients(grad)

  epoch_tf = tf.placeholder(tf.int32, [])
  batch_tf = tf.placeholder(tf.int32, [])

  if use_ema:
    if callable(ema_decay):
      ema_decay = ema_decay(epoch_tf, batch_tf)
    ema = tf.train.ExponentialMovingAverage(decay=ema_decay)
    with tf.control_dependencies([train_step]):
      train_step = ema.apply(var_list)
    # Get pointers to the EMA's running average variables
    avg_params = [ema.average(param) for param in var_list]
    # Make temporary buffers used for swapping the live and running average
    # parameters
    tmp_params = [tf.Variable(param, trainable=False)
                  for param in var_list]
    # Define the swapping operation
    param_to_tmp = [tf.assign(tmp, param)
                    for tmp, param in safe_zip(tmp_params, var_list)]
    with tf.control_dependencies(param_to_tmp):
      avg_to_param = [tf.assign(param, avg)
                      for param, avg in safe_zip(var_list, avg_params)]
    with tf.control_dependencies(avg_to_param):
      tmp_to_avg = [tf.assign(avg, tmp)
                    for avg, tmp in safe_zip(avg_params, tmp_params)]
    swap = tmp_to_avg

  batch_size = args.batch_size

  assert batch_size % num_devices == 0
  device_batch_size = batch_size // num_devices

  if init_all:
    sess.run(tf.global_variables_initializer())
  else:
    initialize_uninitialized_global_variables(sess)

  for epoch in xrange(args.nb_epochs):
    if dataset_train is not None:
      nb_batches = int(math.ceil(float(dataset_size) / batch_size))
    else:
      # Indices to shuffle training set
      index_shuf = list(range(len(x_train)))
      # Randomly repeat a few training examples each epoch to avoid
      # having a too-small batch
      while len(index_shuf) % batch_size != 0:
        index_shuf.append(rng.randint(len(x_train)))
      nb_batches = len(index_shuf) // batch_size
      rng.shuffle(index_shuf)
      # Shuffling here versus inside the loop doesn't seem to affect
      # timing very much, but shuffling here makes the code slightly
      # easier to read
      x_train_shuffled = x_train[index_shuf]
      y_train_shuffled = y_train[index_shuf]

    prev = time.time()
    for batch in range(nb_batches):
      if dataset_train is not None:
        x_train_shuffled, y_train_shuffled = sess.run(data_iterator)
        start, end = 0, batch_size
      else:
        # Compute batch start and end indices
        start = batch * batch_size
        end = (batch + 1) * batch_size
        # Perform one training step
        diff = end - start
        assert diff == batch_size

      feed_dict = {epoch_tf: epoch, batch_tf: batch}
      for dev_idx in xrange(num_devices):
        cur_start = start + dev_idx * device_batch_size
        cur_end = start + (dev_idx + 1) * device_batch_size
        feed_dict[xs[dev_idx]] = x_train_shuffled[cur_start:cur_end]
        feed_dict[ys[dev_idx]] = y_train_shuffled[cur_start:cur_end]
      if cur_end != end and dataset_train is None:
        msg = ("batch_size (%d) must be a multiple of num_devices "
               "(%d).\nCUDA_VISIBLE_DEVICES: %s"
               "\ndevices: %s")
        args = (batch_size, num_devices,
                os.environ['CUDA_VISIBLE_DEVICES'],
                str(devices))
        raise ValueError(msg % args)
      if feed is not None:
        feed_dict.update(feed)

      _, loss_numpy = sess.run(
          [train_step, loss_value], feed_dict=feed_dict)

      if np.abs(loss_numpy) > loss_threshold:
        raise ValueError("Extreme loss during training: ", loss_numpy)
      if np.isnan(loss_numpy) or np.isinf(loss_numpy):
        raise ValueError("NaN/Inf loss during training")
    assert (dataset_train is not None or
            end == len(index_shuf))  # Check that all examples were used
    cur = time.time()
    _logger.info("Epoch " + str(epoch) + " took " +
                 str(cur - prev) + " seconds")
    if evaluate is not None:
      if use_ema:
        # Before running evaluation, load the running average
        # parameters into the live slot, so we can see how well
        # the EMA parameters are performing
        sess.run(swap)
      evaluate()
      if use_ema:
        # Swap the parameters back, so that we continue training
        # on the live parameters
        sess.run(swap)
  if use_ema:
    # When training is done, swap the running average parameters into
    # the live slot, so that we use them when we deploy the model
    sess.run(swap)

  return True
Exemplo n.º 49
0
def train():
    """训练模型"""
    print('数据准备中...')
    bucket_dbs = data_utils.read_bucket_dbs(FLAGS.buckets_dir)
    bucket_sizes = []
    for i in range(len(buckets)):
        bucket_size = bucket_dbs[i].size
        bucket_sizes.append(bucket_size)
        print('bucket {} 中有数据 {} 条'.format(i, bucket_size))
    total_size = sum(bucket_sizes)
    print('共有数据 {} 条'.format(total_size))
    
    with tf.Session() as sess:
        model = create_model(sess, False)
        sess.run(tf.global_variables_initializer())
        # 计算每个文件数据占比
        buckets_scale = [sum(bucket_sizes[:i + 1]) / total_size for i in range(len(bucket_sizes))]
        # 格式化控制台输出
        metrics = '  '.join([
            '\r[{}]',
            '{:.1f}%',
            '{}/{}',
            'loss={:.3f}',
            '{}/{}'
        ])
        bars_max = 20
        with tf.device('/gpu:0'):
            for epoch_index in range(1, FLAGS.num_epoch + 1600):
                print('Epoch {}:'.format(epoch_index))
                time_start = time.time()
                epoch_trained = 0 # 每个epoch已经训练的样本数
                batch_loss = []
                while True:
                    # 随机选择一个要训练的bucket_id
                    random_number = np.random.random_sample()
                    bucket_id = min([i for i in range(len(buckets_scale)) if buckets_scale[i] > random_number])
                    # 拿出64个问答对  data, data_in 问答倒转
                    data, data_in = model.get_batch_data(
                        bucket_dbs,
                        bucket_id
                    )
                    # 将问答对转换为模型训练可接受的格式
                    # bucket_10_20这个bucket对应的维度为:10*64 20*64 20*64
                    encoder_inputs, decoder_inputs, decoder_weights = model.get_batch(
                        bucket_dbs,
                        bucket_id,
                        data
                    )
                    # 训练
                    _, step_loss, output = model.step(
                        sess,
                        encoder_inputs,
                        decoder_inputs,
                        decoder_weights,
                        bucket_id,
                        False
                    )
                    epoch_trained += FLAGS.batch_size
                    batch_loss.append(step_loss)
                    time_now = time.time()
                    time_spend = time_now - time_start
                    time_estimate = time_spend / (epoch_trained / FLAGS.num_per_epoch)
                    percent = min(100, epoch_trained / FLAGS.num_per_epoch) * 100
                    bars = math.floor(percent / 100 * bars_max)
                    sys.stdout.write(metrics.format(
                        '=' * bars + '-' * (bars_max - bars),
                        percent,
                        epoch_trained, FLAGS.num_per_epoch,
                        np.mean(batch_loss),
                        data_utils.time(time_spend), data_utils.time(time_estimate)
                    ))
                    sys.stdout.flush()
                    if epoch_trained >= FLAGS.num_per_epoch:
                        break
                print('\n')

        if not os.path.exists(FLAGS.model_dir):
            os.makedirs(FLAGS.model_dir)
        if epoch_index%800==0:
            model.saver.save(sess, os.path.join(FLAGS.model_dir, FLAGS.model_name))
def libchatbot(save_dir='models/personality', max_length=500, beam_width=2,
        relevance=-1., temperature=1.0, topn=-1):
    model_path, config_path, vocab_path = get_paths(save_dir)
    # Arguments passed to sample.py direct us to a saved model.
    # Load the separate arguments by which that model was previously trained.
    # That's saved_args. Use those to load the model.
    saved_args = None
    chars = None
    vocab = None
    with open(config_path, 'rb') as f:
        saved_args = pickle.load(f)
    # Separately load chars and vocab from the save directory.
    with open(vocab_path, 'rb') as f:
        chars, vocab = pickle.load(f)
    # Create the model from the saved arguments, in inference mode.
    print("Creating model...")
    saved_args.batch_size = beam_width
    net = Model(saved_args, True)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    # Make tensorflow less verbose; filter out info (1+) and warnings (2+) but not errors (3).
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    session = tf.Session(config=config)
    session.__enter__()
    tf.global_variables_initializer().run()
    saver = tf.train.Saver(net.save_variables_list())
    # Restore the saved variables, replacing the initialized values.
    print("Restoring weights...")
    saver.restore(session, model_path)
    states = initial_state_with_relevance_masking(net, session, relevance)
    args = {
        'session': session,
        'states': states
    }

    def consumer(text, args=args, net=net, vocab=vocab, max_length=max_length,
            relevance=relevance, temperature=temperature, beam_width=beam_width, topn=topn):
        user_input = text
        states = args['states']
        session = args['session']
        
        states = forward_text(net, session, states, relevance, vocab, sanitize_text(vocab, "> " + user_input + "\n>"))
        computer_response_generator = beam_search_generator(sess=session, net=net,
            initial_state=copy.deepcopy(states), initial_sample=vocab[' '],
            early_term_token=vocab['\n'], beam_width=beam_width, forward_model_fn=forward_with_mask,
            forward_args={'relevance':relevance, 'mask_reset_token':vocab['\n'], 'forbidden_token':vocab['>'],
                            'temperature':temperature, 'topn':topn})
        out_chars = []
        result = ''
        for i, char_token in enumerate(computer_response_generator):
            out_chars.append(chars[char_token])
            result += chars[char_token]
            #print(possibly_escaped_char(out_chars), end='', flush=True)
            #with open('op.txt', 'a') as f2:
            #    f2.write(possibly_escaped_char(out_chars))
            states = forward_text(net, session, states, relevance, vocab, chars[char_token])
            if i >= max_length: break
        states = forward_text(net, session, states, relevance, vocab, sanitize_text(vocab, "\n> "))

        args['states'] = states
        args['session'] = session
        return result
    
    def save_states(name):
        with open(name + '.pkl', 'wb') as f:
            pickle.dump(args['states'], f)

    def load_states(name):
        with open(name + '.pkl', 'rb') as f:
            args['states'] = pickle.load(f)

    def reset_states(net=net, relevance=relevance):
        args['states'] = initial_state_with_relevance_masking(net, args['session'], relevance)
    return save_states, load_states, reset_states, consumer
word3 = "man"
word4 = "girl"

sentence = "I am red"
sentence2 = "I am blue"
sentence3 = "I am green"

messages = [word, word2, word3, word4, sentence, sentence2, sentence3]

# Reduce logging output.
tf.logging.set_verbosity(tf.logging.ERROR)

list_embedding = []

with tf.Session() as session:
   session.run([tf.global_variables_initializer(), tf.tables_initializer()])
   message_embeddings = session.run(embed(messages))

   for i, message_embedding in enumerate(np.array(message_embeddings).tolist()):
       print("Message: {}".format(messages[i]))
       print("Embedding size: {}".format(len(message_embedding)))
       list_embedding.append(message_embedding)
       message_embedding_snippet = ", ".join(
           (str(x) for x in message_embedding[:3]))
       print("Embedding: [{}, ...]\n".format(message_embedding_snippet))
       # Compute a representation for each message, showing various lengths supported.

messages = ["That band rocks!", "That song is really cool."]

temp = np.array(list_embedding[0]) - np.array(list_embedding[2]) + np.array(list_embedding[3])
Exemplo n.º 52
0
def train(data_dir, batch_size, net_name,
          epochs, start_epoch, start_iter, change_train_data_epoch,
          learning_rate, decay_rate, decay_steps,
          val_rate, save_rate,
          checkpoint_dir, log_dir,
          training):

    print('==> Get train and test data...')
    dataloader = DataSet(data_dir, batch_size, training)
    train_1w_batch = dataloader.train_1w_loader()
    train_b_batch = dataloader.train_b_loader()
    train_data_size_list = np.array([dataloader.nbr_train_1w, dataloader.nbr_train_b])
    print('==> Finished!')

    print('==> Create YOLOv3')
    print('--- use ', net_name)
    inputs_x = tf.placeholder(tf.float32, [None, cfg.IMG_HEIGHT, cfg.IMG_WIDTH, 3])
    model = getattr(net,net_name)(inputs_x, training)

    total_grid_cell_attr = 5 + cfg.NUM_CLASS
    inputs_y = [tf.placeholder(tf.float32, [None, cfg.SCALES[0], cfg.SCALES[0], total_grid_cell_attr]),
                tf.placeholder(tf.float32, [None, cfg.SCALES[1], cfg.SCALES[1], total_grid_cell_attr]),
                tf.placeholder(tf.float32, [None, cfg.SCALES[2], cfg.SCALES[2], total_grid_cell_attr])]
    yolo_v3 = YOLOv3(model, inputs_y, batch_size=batch_size, is_training=training)
    print('==> Finished!')

    print('==> Get each scale total loss')
    loss = yolo_v3.loss
    print('==> Finished!')

    print('==> Create optimizer')
    print('--- epochs = %d' % epochs)
    print('--- train_data_size = ', train_data_size_list)
    print('--- learning_rate = %f' % learning_rate)

    print('--- update learning_rate: ')
    print('--- \tlearning_rate = learning_rate * decay_rate^(global_step / decay_step)')
    print('--- decay_rate = %f' % decay_rate)

    total_step_list = [change_train_data_epoch * np.ceil(train_data_size_list[0] / batch_size), 
                        (epochs - change_train_data_epoch) * np.ceil(train_data_size_list[1] / batch_size)]
    print('--- total_step = ', total_step_list)

    print('--- start_epochs = %d' % start_epoch)

    train_iter_max_list = np.ceil(train_data_size_list / batch_size)
    print('--- train iter_max = ', train_iter_max_list)

    global_step = (start_epoch * train_iter_max_list[0] + start_iter) if start_epoch < change_train_data_epoch else (change_train_data_epoch * train_iter_max_list[0] + start_iter)
    print('--- global_step = %d' % global_step)
    global_step = tf.Variable(start_epoch * train_iter_max_list[0] + start_iter, trainable=False)

    print('change train data epoch = %d' % (change_train_data_epoch))  # [0,0,1],[0,1,0],[1,0,0]/ [1,0,1],[0,1,0],[1,0,1]

    # learning_rate = learning_rate * decay_rate^(global_step / decay_steps)
    learning_rate = tf.train.exponential_decay(learning_rate, global_step,
                                               decay_steps, decay_rate, staircase=False)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    #update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    #with tf.control_dependencies(update_ops):
    train_op = optimizer.minimize(loss, global_step=global_step)
    print('==> Finished!')

    # init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    init_op = tf.group(tf.global_variables_initializer())

    saver = tf.train.Saver(tf.global_variables())

    summary_op = tf.summary.merge_all()
    # train_writer = tf.summary.FileWriter(log_dir, flush_secs=60)
    # val_writer = tf.summary.FileWriter(log_dir, flush_secs=60)
    train_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'), flush_secs=60)

    with tf.Session() as sess:
        print('==> Load checkpoing')
        if len(os.listdir(checkpoint_dir)) >= 4:
            print('--> Restoring checkpoint from: ' + checkpoint_dir)

            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                sess.run(tf.local_variables_initializer())

            print('==> Load finished!')
        else:
            print('==> No checkpoint, start training new model')
            print('==> Init global variables')

            sess.run(init_op)
            train_writer.add_graph(sess.graph)

            print('==> Init finished!')

        print('==> Training start')

        epoch = start_epoch
        print('--- epoch = %d' % epoch)

        iter = start_iter
        print('--- iter = %d' % iter)

        summary_iter = 5

        print('--- save_rate = %d' % save_rate)

        print('--- val_rate = %d' % val_rate)

        save_path = os.path.join(checkpoint_dir)
        start_time = time.time()

        train_iter_max = train_iter_max_list[0] if epoch < change_train_data_epoch else train_iter_max_list[1]

        step = 0
        val_iter = 0
        total_val_loss = 0
        best_loss = 2147483647
        total_step = 0
        set_total_step = False
        total_loss = 0
        step_loss = 100000
        while epoch < epochs:
            while iter < train_iter_max:

                # ================== train ==================
                if epoch >= change_train_data_epoch:
                    batch = next(train_b_batch)
                    if set_total_step:
                        total_step = total_step_list[1]
                        train_iter_max = train_iter_max_list[1]
                        
                        set_total_step = False
                else:
                    batch = next(train_1w_batch)
                    if not set_total_step:
                        total_step = total_step_list[0]
                        train_iter_max = train_iter_max_list[0]
                        
                        set_total_step = True

                feed_dict = {
                    inputs_x: batch[0],
                    inputs_y[0]: batch[1][0],
                    inputs_y[1]: batch[1][1],
                    inputs_y[2]: batch[1][2]
                }

                _, total_loss,best_confidence_mask_test,label_object_mask_test = sess.run([train_op, loss, yolo_v3.best_confidence_mask_test, yolo_v3.label_object_mask_test], feed_dict=feed_dict)
                eta = remain_time(start_time, total_step, step)
                print('--- Epoch {}, Iter {}, ETA {:.2f}m, loss {:.3f}'.format(epoch, iter, eta, total_loss))
                # ================== train ==================

                # ================== val ==================
                #if (step + 1) % val_rate == 0:
                    #print('==> Val test start')
        
                    #val_step = 0
                    #val_log_path = os.path.join(log_dir, 'val' + str((step + 1) // val_rate))
                    
                    #if not os.path.isdir(val_log_path):
                        #os.makedirs(val_log_path)
                    ## val_writer = tf.summary.FileWriter(val_log_path, flush_secs=60)

                    #if epoch >= change_train_data_epoch:
                        #val_batch = dataloader.val_b_loader()
                        #print('--- val data size: ', dataloader.nbr_val_b)
                    #else:
                        #val_batch = dataloader.val_1w_loader()
                        #print('--- val data size: ', dataloader.nbr_val_1w)

                    #for batch in val_batch:
                        #feed_dict = {
                            #inputs_x: batch[0],
                            #inputs_y[0]: batch[1][0],
                            #inputs_y[1]: batch[1][1],
                            #inputs_y[2]: batch[1][2]
                        #}
                        #val_loss = sess.run(loss, feed_dict=feed_dict)
                        #total_val_loss += val_loss
                        ## val_writer.add_summary(summary_str, val_step)
                        #val_step += 1

                    #total_val_loss /= (val_step + 0.001)

                    #print('-- Val loss {:.3f}'.format(total_val_loss), end=' ')

                    #if total_val_loss < best_loss:
                        #tmp = best_loss
                        #best_loss = total_val_loss
                        #print('Better then {:.3f}'.format(tmp))

                        #model_name = save_path + os.sep + 'yolov3.ckpt' + '-epoch_' + str(epoch) + '_' + \
                                     #str(iter) + '-bestloss_' + '%.3f' % best_loss
                        #saver.save(sess, save_path=model_name, global_step=step)
                        #print('--- save checkpoint best_loss: %.3f' % best_loss)
                    #else:
                        #print('Not better than {:.3f}'.format(best_loss))

                    #total_val_loss = 0
                # ================== val ==================
                if total_loss < step_loss:
                    step_loss = total_loss
               # if (step + 1) % save_rate == 0:
                    model_name = save_path + os.sep + 'yolov3.ckpt' + '-epoch_' + str(epoch) + '_' + \
                                 str(iter) + '-loss_' + '%.3f' % total_loss
                    saver.save(sess, save_path=model_name, global_step=step)
                    print('--- save checkpoint loss: %.3f' % total_loss)

                start_time = time.time()

                step += 1
                iter += 1
                val_iter += 1
                global_step += 1

            iter = 0
            epoch += 1

        model_name = save_path + os.sep + 'yolov3.ckpt' + '-epoch_' + str(epoch) + '_' + \
                    str(iter) + '-loss_' + '%.3f' % total_loss[0]
        saver.save(sess, save_path=model_name, global_step=step)
        print('--- save checkpoint loss: %.3f' % total_loss)

        print('==> Training Finished!')
Exemplo n.º 53
0
def img_removal_by_embed(root_dir,
                         output_dir,
                         pb_path,
                         node_dict,
                         threshold=0.7,
                         type='copy',
                         GPU_ratio=None,
                         dataset_range=None):
    # ----var
    img_format = {"png", 'jpg', 'bmp'}
    batch_size = 64

    # ----collect all folders
    dirs = [obj.path for obj in os.scandir(root_dir) if obj.is_dir()]
    if len(dirs) == 0:
        print("No sub-dirs in ", root_dir)
    else:
        #----dataset range
        if dataset_range is not None:
            dirs = dirs[dataset_range[0]:dataset_range[1]]

        # ----model init
        sess, tf_dict = model_restore_from_pb(pb_path,
                                              node_dict,
                                              GPU_ratio=GPU_ratio)
        tf_input = tf_dict['input']
        tf_phase_train = tf_dict['phase_train']
        tf_embeddings = tf_dict['embeddings']
        model_shape = [None, 160, 160, 3]
        feed_dict = {tf_phase_train: False}

        # ----tf setting for calculating distance
        with tf.Graph().as_default():
            tf_tar = tf.placeholder(dtype=tf.float32,
                                    shape=tf_embeddings.shape[-1])
            tf_ref = tf.placeholder(dtype=tf.float32,
                                    shape=tf_embeddings.shape)
            tf_dis = tf.sqrt(
                tf.reduce_sum(tf.square(tf.subtract(tf_ref, tf_tar)), axis=1))
            # ----GPU setting
            config = tf.ConfigProto(
                log_device_placement=True,
                allow_soft_placement=True,  # 允許當找不到設備時自動轉換成有支援的設備
            )
            config.gpu_options.allow_growth = True
            sess_cal = tf.Session(config=config)
            sess_cal.run(tf.global_variables_initializer())

        #----process each folder
        for dir_path in dirs:
            paths = [
                file.path for file in os.scandir(dir_path)
                if file.name.split(".")[-1] in img_format
            ]
            len_path = len(paths)
            if len_path == 0:
                print("No images in ", dir_path)
            else:
                # ----create the sub folder in the output folder
                save_dir = os.path.join(output_dir, dir_path.split("\\")[-1])
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)
                # ----calculate embeddings
                ites = math.ceil(len_path / batch_size)
                embeddings = np.zeros([len_path, tf_embeddings.shape[-1]],
                                      dtype=np.float32)
                for idx in range(ites):
                    num_start = idx * batch_size
                    num_end = np.minimum(num_start + batch_size, len_path)
                    # ----read batch data
                    batch_dim = [num_end - num_start]  #[64]
                    batch_dim.extend(model_shape[1:])  #[64,160, 160, 3]
                    batch_data = np.zeros(batch_dim, dtype=np.float32)
                    for idx_path, path in enumerate(paths[num_start:num_end]):
                        img = cv2.imread(path)
                        if img is None:
                            print("Read failed:", path)
                        else:
                            img = cv2.resize(img,
                                             (model_shape[2], model_shape[1]))
                            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                            batch_data[idx_path] = img
                    batch_data /= 255  # norm
                    feed_dict[tf_input] = batch_data
                    embeddings[num_start:num_end] = sess.run(
                        tf_embeddings, feed_dict=feed_dict)

                # ----calculate ave distance of each image
                feed_dict_2 = {tf_ref: embeddings}
                ave_dis = np.zeros(embeddings.shape[0], dtype=np.float32)
                for idx, embedding in enumerate(embeddings):
                    feed_dict_2[tf_tar] = embedding
                    distance = sess_cal.run(tf_dis, feed_dict=feed_dict_2)
                    ave_dis[idx] = np.sum(distance) / (embeddings.shape[0] - 1)
                # ----remove or copy images
                for idx, path in enumerate(paths):
                    if ave_dis[idx] > threshold:
                        print("path:{}, ave_distance:{}".format(
                            path, ave_dis[idx]))
                        if type == "copy":
                            save_path = os.path.join(save_dir,
                                                     path.split("\\")[-1])
                            shutil.copy(path, save_path)
                        elif type == "move":
                            save_path = os.path.join(save_dir,
                                                     path.split("\\")[-1])
                            shutil.move(path, save_path)
Exemplo n.º 54
0
def train():
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        assert FLAGS.batch_size % FLAGS.num_gpus == 0, ('Batch size must be divisible by number of GPUs')

        bs_l = FLAGS.batch_size * FLAGS.label_ratio
        bs_u = FLAGS.batch_size * (1 - FLAGS.label_ratio)
        num_iter_per_epoch = int(FLAGS.num_train_u / bs_u)
        max_steps = int(FLAGS.num_epochs * num_iter_per_epoch)
        num_classes = FLAGS.num_classes

        global_step = slim.create_global_step()
        lr = tf.placeholder(tf.float32, shape=[], name="learning_rate")
        opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9, use_nesterov=True)

        images_l, labels_l = utils.prepare_traindata(FLAGS.dataset_dir_l, int(bs_l))
        images_u, labels_u = utils.prepare_traindata(FLAGS.dataset_dir_u, int(bs_u))

        images_splits_l = tf.split(images_l, FLAGS.num_gpus, 0)
        images_splits_u = tf.split(images_u, FLAGS.num_gpus, 0)
        labels_splits_l = tf.split(labels_l, FLAGS.num_gpus, 0)
        labels_splits_u = tf.split(labels_u, FLAGS.num_gpus, 0)

        images_splits = []
        labels_splits = []
        for i in range(FLAGS.num_gpus):
            images_splits.append(tf.concat([images_splits_l[i], images_splits_u[i]], 0))
            labels_splits.append(tf.concat([labels_splits_l[i], labels_splits_u[i]], 0))

        tower_grads = []
        top_1_op = []
        memory_op = []
        reuse_variables = None
        for i in range(FLAGS.num_gpus):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % (network.TOWER_NAME, i)) as scope:
                    with slim.arg_scope(slim.get_model_variables(scope=scope), device='/cpu:0'):
                        loss, loss_s, loss_m, labels, logits, memory_update = \
                            _build_training_graph(images_splits[i], labels_splits[i], num_classes, reuse_variables)

                        memory_op.append(memory_update)
                        top_1_op.append(tf.nn.in_top_k(logits, labels, 1))

                    reuse_variables = True
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
                    batchnorm = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)
                    grads = opt.compute_gradients(loss)
                    tower_grads.append(grads)

        grads = network.average_gradients(tower_grads)
        gradient_op = opt.apply_gradients(grads, global_step=global_step)

        var_averages = tf.train.ExponentialMovingAverage(FLAGS.ema_decay, global_step)
        var_op = var_averages.apply(tf.trainable_variables())

        batchnorm_op = tf.group(*batchnorm)
        train_op = tf.group(gradient_op, var_op, batchnorm_op)

        saver = tf.train.Saver(tf.global_variables(), max_to_keep=None)
        summary_op = tf.summary.merge(summaries)
        init_op = tf.global_variables_initializer()

        config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
        if FLAGS.gpu_memory:
            config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory
        sess = tf.Session(config=config)

        boundaries, values = utils.config_lr(max_steps)
        sess.run([init_op], feed_dict={lr: values[0]})

        tf.train.start_queue_runners(sess=sess)
        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=sess.graph)

        iter_count = epoch = sum_loss = sum_loss_s = sum_loss_m = sum_top_1 = 0
        start = time.time()

        for step in range(max_steps):

            decayed_lr = utils.decay_lr(step, boundaries, values, max_steps)
            _, _, loss_value, loss_value_s, loss_value_m, top_1_value = \
                sess.run([train_op, memory_op, loss, loss_s, loss_m, top_1_op], feed_dict={lr: decayed_lr})

            sum_loss += loss_value
            sum_loss_s += loss_value_s
            sum_loss_m += loss_value_m
            top_1_value = np.sum(top_1_value) / bs_l
            sum_top_1 += top_1_value
            iter_count +=1

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
            assert not np.isnan(loss_value_s), 'Model diverged with loss = NaN'
            assert not np.isnan(loss_value_m), 'Model diverged with loss = NaN'

            if step % num_iter_per_epoch == 0 and step > 0:
                end = time.time()
                sum_loss = sum_loss / num_iter_per_epoch
                sum_loss_s = sum_loss_s / num_iter_per_epoch
                sum_loss_m = sum_loss_m / num_iter_per_epoch
                sum_top_1 = min(sum_top_1 / num_iter_per_epoch, 1.0)
                time_per_iter = float(end - start) / iter_count
                format_str = ('epoch %d, L = %.2f, Ls = %.2f, Lm = %.2f, top_1 = %.2f, lr = %.4f (time_per_iter: %.4f s)')
                print(format_str % (epoch, sum_loss, sum_loss_s, sum_loss_m, sum_top_1*100, decayed_lr, time_per_iter))
                epoch +=1
                sum_loss = sum_loss_s = sum_loss_m = sum_top_1 = 0

            if step % 100 == 0:
                summary_str = sess.run(summary_op, feed_dict={lr: decayed_lr})
                summary_writer.add_summary(summary_str, step)

            if (step + 1) == max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=epoch)
Exemplo n.º 55
0
def train(sess, env, actor4, critic4):
    # Set up summary ops
    summary_ops, summary_vars = build_summaries()

    # Initialize Tensorflow variables
    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph)

    # Initialize target network weights
    actor4.update_target_network()
    critic4.update_target_network()

    # Initialize replay memory
    replay_buffer = ReplayBuffer(BUFFER_SIZE, RANDOM_SEED)

    for i in xrange(MAX_EPISODES):
        s = env.reset()

        episode_reward = 0
        episode_ave_max_q = 0
        # angle = np.zeros(MAX_STEPS_EPISODE)

        noise = ExplorationNoise.ou_noise(OU_THETA, OU_MU, OU_SIGMA,
                                          MAX_STEPS_EPISODE)
        noise = ExplorationNoise.exp_decay(noise, EXPLORATION_TIME)

        for j in xrange(MAX_STEPS_EPISODE):

            if RENDER_ENV and i % 10 == 0:
                env.render()

            # Add exploratory noise according to Ornstein-Uhlenbeck process to action
            # Decay exploration exponentially from 1 to 0 in EXPLORATION_TIME steps
            if i < EXPLORATION_TIME:
                a = actor4.predict(
                    np.reshape(s,
                               (1, env.observation_space.shape[0]))) + noise[j]
            else:
                a = actor4.predict(
                    np.reshape(s, (1, env.observation_space.shape[0])))

            s2, r, terminal, info = env.step(a)

            # print s2

            if i % 10 == 0:
                print a
                #print actor2.state_dim,"\t",actor2.action_dim

            # plt.figure(2)
            # plt.plot(j,s2[0], hold=True)
            # plt.show()
            # plt.hold(True)
            #if j%100 == 0:
            #    print j, s2

            replay_buffer.add(np.reshape(s, actor4.state_dim),
                              np.reshape(a, actor4.action_dim), r, terminal,
                              np.reshape(s2, actor4.state_dim))

            # Keep adding experience to the memory until
            # there are at least minibatch size samples
            if replay_buffer.size() > MINIBATCH_SIZE:
                s_batch, a_batch, r_batch, t_batch, s2_batch = \
                    replay_buffer.sample_batch(MINIBATCH_SIZE)

                # Calculate targets
                target_q = critic4.predict_target(
                    s2_batch, actor4.predict_target(s2_batch))

                y_i = []
                for k in xrange(MINIBATCH_SIZE):
                    # If state is terminal assign reward only
                    if t_batch[k]:
                        y_i.append(r_batch[k])
                    # Else assgin reward + net target Q
                    else:
                        y_i.append(r_batch[k] + GAMMA * target_q[k])

                # Update the critic given the targets
                predicted_q_value, _ = \
                    critic4.train(s_batch, a_batch, np.reshape(y_i, (MINIBATCH_SIZE, 1)))

                episode_ave_max_q += np.amax(predicted_q_value)

                # Update the actor policy using the sampled gradient
                a_outs = actor4.predict(s_batch)
                a_grads = critic4.action_gradients(s_batch, a_outs)
                actor4.train(s_batch, a_grads[0])

                # Update target networks
                actor4.update_target_network()
                critic4.update_target_network()

            s = s2
            # angle[j] = s
            episode_reward += r

            if terminal or j == MAX_STEPS_EPISODE - 1:
                summary_str = sess.run(summary_ops,
                                       feed_dict={
                                           summary_vars[0]: episode_reward[0],
                                           summary_vars[1]: episode_ave_max_q
                                       })
                #plt.plot(angle)
                #plt.show()
                # print s2

                writer.add_summary(summary_str, i)
                writer.flush()

                print 'Reward: %.2i' % int(episode_reward), ' | Episode', i, \
                      '| Qmax: %.4f' % (episode_ave_max_q / float(j))

                REWARD.append(episode_reward)
                QMAX.append(episode_ave_max_q)

                break
Exemplo n.º 56
0
def main(empresa):
    webhoseio.config(token="90a4a1a8-5016-4023-bdd6-8b302321a632")

    #Cargar lista de empresas a buscar
    salida = pd.DataFrame()

    empresa=empresa.replace(", Inc.","")
    empresa=empresa.replace(", Inc","")
    empresa=empresa.replace("Inc.","")
    empresa=empresa.replace("Inc","")
    query_params = {
    "q": "\"" + empresa + "\" site_type:news language:english (site:cnn.com OR site:wsj.com OR site:forbes.com OR site:marketwatch.com OR site:thestreet.com OR site:thisismoney.co.uk OR site:kiplinger.com site:bloomberg.com OR site:highpointobserver.com)",
    "ts": str(round(dt(dt.now().year,dt.now().month,dt.now().day,0,0).timestamp())),
    "sort": "relevancy"
    }
    output = webhoseio.query("filterWebContent", query_params)
    output = pd.DataFrame(output['posts'])
    if output.shape[0] > 0:
        for i in range(output.shape[0]):
            output.loc[i,'text'] = output.loc[i,'text'].replace('\n','')
            output.loc[i,'published'] = output.loc[i,'published'][0:10]
        salida = salida.append(output[['published','text']],ignore_index = True)

    reviews = salida["text"]
    labels = pd.Series(np.zeros(len(reviews)))
    # In[3]:

    # with open('news.txt', 'r') as f:
    #     reviews = f.read()
    # with open('results.txt', 'r') as f:
    #     labels = f.read()


    # In[4]:

    reviews[:2000]


    # ## Data preprocessing
    #
    # The first step when building a neural network model is getting your data into the proper form to feed into the network. Since we're using embedding layers, we'll need to encode each word with an integer. We'll also want to clean it up a bit.
    #
    # You can see an example of the reviews data above. We'll want to get rid of those periods. Also, you might notice that the reviews are delimited with newlines `\n`. To deal with those, I'm going to split the text into each review using `\n` as the delimiter. Then I can combined all the reviews back together into one big string.
    #
    # First, let's remove all punctuation. Then get all the text without the newlines and split it into individual words.

    # In[5]:

    from string import punctuation
    all_text = ''.join([c for c in reviews if c not in punctuation])
    all_text=''.join([i for i in all_text if not i.isnumeric()])
    reviews = all_text.split('\n')

    all_text = ' '.join(reviews)
    words = all_text.split()


    # In[6]:

    all_text[:2000]


    # In[7]:

    words[10000:10100]

    # ### Encoding the words
    #
    # The embedding lookup requires that we pass in integers to our network. The easiest way to do this is to create dictionaries that map the words in the vocabulary to integers. Then we can convert each of our reviews into integers so they can be passed into the network.
    #
    # > **Exercise:** Now you're going to encode the words with integers. Build a dictionary that maps words to integers. Later we're going to pad our input vectors with zeros, so make sure the integers **start at 1, not 0**.
    # > Also, convert the reviews to integers and store the reviews in a new list called `reviews_ints`.

    # In[8]:

    from collections import Counter
    counts = Counter(words)
    vocab = sorted(counts, key=counts.get, reverse=True)
    vocab_to_int = {word: ii for ii, word in enumerate(vocab, 1)}

    reviews_ints = []
    for each in reviews:
        reviews_ints.append([vocab_to_int[word] for word in each.split()])


    # ### Encoding the labels
    #
    # Our labels are "positive" or "negative". To use these labels in our network, we need to convert them to 0 and 1.
    #
    # > **Exercise:** Convert labels from `positive` and `negative` to 1 and 0, respectively.

    # In[9]:

    # labels = labels.split('\n')
    # labels = np.array([1 if each == 1 else 0 for each in labels])


    # In[10]:

    review_lens = Counter([len(x) for x in reviews_ints])

    # Okay, a couple issues here. We seem to have one review with zero length. And, the maximum review length is way too many steps for our RNN. Let's truncate to 200 steps. For reviews shorter than 200, we'll pad with 0s. For reviews longer than 200, we can truncate them to the first 200 characters.
    #
    # > **Exercise:** First, remove the review with zero length from the `reviews_ints` list.

    # In[11]:

    non_zero_idx = [ii for ii, review in enumerate(reviews_ints) if len(review) != 0]
    len(non_zero_idx)


    # In[12]:

    reviews_ints[-1]


    # Turns out its the final review that has zero length. But that might not always be the case, so let's make it more general.

    # In[13]:

    reviews_ints = [reviews_ints[ii] for ii in non_zero_idx]
    labels = np.array([labels[ii] for ii in non_zero_idx])

    # > **Exercise:** Now, create an array `features` that contains the data we'll pass to the network. The data should come from `review_ints`, since we want to feed integers to the network. Each row should be 200 elements long. For reviews shorter than 200 words, left pad with 0s. That is, if the review is `['best', 'movie', 'ever']`, `[117, 18, 128]` as integers, the row will look like `[0, 0, 0, ..., 0, 117, 18, 128]`. For reviews longer than 200, use on the first 200 words as the feature vector.
    #
    # This isn't trivial and there are a bunch of ways to do this. But, if you're going to be building your own deep learning networks, you're going to have to get used to preparing your data.
    #
    #

    # In[14]:

    seq_len = 200
    features = np.zeros((len(reviews_ints), seq_len), dtype=int)
    for i, row in enumerate(reviews_ints):
        features[i, -len(row):] = np.array(row)[:seq_len]


    # In[15]:

    features[:1,:200]


    # ## Training, Validation, Test
    #
    #

    # With our data in nice shape, we'll split it into training, validation, and test sets.
    #
    # > **Exercise:** Create the training, validation, and test sets here. You'll need to create sets for the features and the labels, `train_x` and `train_y` for example. Define a split fraction, `split_frac` as the fraction of data to keep in the training set. Usually this is set to 0.8 or 0.9. The rest of the data will be split in half to create the validation and testing data.

    # In[16]:

    split_frac = 0.8
    split_idx = int(len(features)*0.8)
    train_x, val_x = features[:split_idx], features[split_idx:]
    train_y, val_y = labels[:split_idx], labels[split_idx:]

    test_idx = int(len(val_x)*0.5)
    val_x, test_x = val_x[:test_idx], val_x[test_idx:]
    val_y, test_y = val_y[:test_idx], val_y[test_idx:]

    #

    # ## Build the graph
    #
    # Here, we'll build the graph. First up, defining the hyperparameters.
    #
    # * `lstm_size`: Number of units in the hidden layers in the LSTM cells. Usually larger is better performance wise. Common values are 128, 256, 512, etc.
    # * `lstm_layers`: Number of LSTM layers in the network. I'd start with 1, then add more if I'm underfitting.
    # * `batch_size`: The number of reviews to feed the network in one training pass. Typically this should be set as high as you can go without running out of memory.
    # * `learning_rate`: Learning rate

    # In[17]:

    lstm_size = 256
    lstm_layers = 1
    batch_size = 1
    learning_rate = 0.001


    # For the network itself, we'll be passing in our 200 element long review vectors. Each batch will be `batch_size` vectors. We'll also be using dropout on the LSTM layer, so we'll make a placeholder for the keep probability.

    # > **Exercise:** Create the `inputs_`, `labels_`, and drop out `keep_prob` placeholders using `tf.placeholder`. `labels_` needs to be two-dimensional to work with some functions later.  Since `keep_prob` is a scalar (a 0-dimensional tensor), you shouldn't provide a size to `tf.placeholder`.

    # In[18]:

    n_words = len(vocab_to_int)

    # Create the graph object
    graph = tf.Graph()
    # Add nodes to the graph
    with graph.as_default():
        inputs_ = tf.placeholder(tf.int32, [None, None], name='inputs')
        labels_ = tf.placeholder(tf.int32, [None, None], name='labels')
        keep_prob = tf.placeholder(tf.float32, name='keep_prob')


    # ### Embedding
    #
    # Now we'll add an embedding layer. We need to do this because there are 74000 words in our vocabulary. It is massively inefficient to one-hot encode our classes here. You should remember dealing with this problem from the word2vec lesson. Instead of one-hot encoding, we can have an embedding layer and use that layer as a lookup table. You could train an embedding layer using word2vec, then load it here. But, it's fine to just make a new layer and let the network learn the weights.
    #
    # > **Exercise:** Create the embedding lookup matrix as a `tf.Variable`. Use that embedding matrix to get the embedded vectors to pass to the LSTM cell with [`tf.nn.embedding_lookup`](https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup). This function takes the embedding matrix and an input tensor, such as the review vectors. Then, it'll return another tensor with the embedded vectors. So, if the embedding layer as 200 units, the function will return a tensor with size [batch_size, 200].
    #
    #

    # In[19]:

    # Size of the embedding vectors (number of units in the embedding layer)
    embed_size = 300

    with graph.as_default():
        embedding = tf.Variable(tf.random_uniform((n_words, embed_size), -1, 1))
        embed = tf.nn.embedding_lookup(embedding, inputs_)


    # ### LSTM cell
    #
    # <img src="assets/network_diagram.png" width=400px>
    #
    # Next, we'll create our LSTM cells to use in the recurrent network ([TensorFlow documentation](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn)). Here we are just defining what the cells look like. This isn't actually building the graph, just defining the type of cells we want in our graph.
    #
    # To create a basic LSTM cell for the graph, you'll want to use `tf.contrib.rnn.BasicLSTMCell`. Looking at the function documentation:
    #
    # ```
    # tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0, input_size=None, state_is_tuple=True, activation=<function tanh at 0x109f1ef28>)
    # ```
    #
    # you can see it takes a parameter called `num_units`, the number of units in the cell, called `lstm_size` in this code. So then, you can write something like
    #
    # ```
    # lstm = tf.contrib.rnn.BasicLSTMCell(num_units)
    # ```
    #
    # to create an LSTM cell with `num_units`. Next, you can add dropout to the cell with `tf.contrib.rnn.DropoutWrapper`. This just wraps the cell in another cell, but with dropout added to the inputs and/or outputs. It's a really convenient way to make your network better with almost no effort! So you'd do something like
    #
    # ```
    # drop = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
    # ```
    #
    # Most of the time, you're network will have better performance with more layers. That's sort of the magic of deep learning, adding more layers allows the network to learn really complex relationships. Again, there is a simple way to create multiple layers of LSTM cells with `tf.contrib.rnn.MultiRNNCell`:
    #
    # ```
    # cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)
    # ```
    #
    # Here, `[drop] * lstm_layers` creates a list of cells (`drop`) that is `lstm_layers` long. The `MultiRNNCell` wrapper builds this into multiple layers of RNN cells, one for each cell in the list.
    #
    # So the final cell you're using in the network is actually multiple (or just one) LSTM cells with dropout. But it all works the same from an achitectural viewpoint, just a more complicated graph in the cell.
    #
    # > **Exercise:** Below, use `tf.contrib.rnn.BasicLSTMCell` to create an LSTM cell. Then, add drop out to it with `tf.contrib.rnn.DropoutWrapper`. Finally, create multiple LSTM layers with `tf.contrib.rnn.MultiRNNCell`.
    #
    # Here is [a tutorial on building RNNs](https://www.tensorflow.org/tutorials/recurrent) that will help you out.
    #

    # In[20]:

    with graph.as_default():
        # Your basic LSTM cell
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)

        # Add dropout to the cell
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)

        # Stack up multiple LSTM layers, for deep learning
        cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)

        # Getting an initial state of all zeros
        initial_state = cell.zero_state(batch_size, tf.float32)


    # ### RNN forward pass
    #
    # <img src="assets/network_diagram.png" width=400px>
    #
    # Now we need to actually run the data through the RNN nodes. You can use [`tf.nn.dynamic_rnn`](https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn) to do this. You'd pass in the RNN cell you created (our multiple layered LSTM `cell` for instance), and the inputs to the network.
    #
    # ```
    # outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state)
    # ```
    #
    # Above I created an initial state, `initial_state`, to pass to the RNN. This is the cell state that is passed between the hidden layers in successive time steps. `tf.nn.dynamic_rnn` takes care of most of the work for us. We pass in our cell and the input to the cell, then it does the unrolling and everything else for us. It returns outputs for each time step and the final_state of the hidden layer.
    #
    # > **Exercise:** Use `tf.nn.dynamic_rnn` to add the forward pass through the RNN. Remember that we're actually passing in vectors from the embedding layer, `embed`.
    #
    #

    # In[21]:

    with graph.as_default():
        outputs, final_state = tf.nn.dynamic_rnn(cell, embed,
                                                 initial_state=initial_state)


    # ### Output
    #
    # We only care about the final output, we'll be using that as our sentiment prediction. So we need to grab the last output with `outputs[:, -1]`, the calculate the cost from that and `labels_`.

    # In[22]:

    with graph.as_default():
        predictions = tf.contrib.layers.fully_connected(outputs[:, -1], 1, activation_fn=tf.sigmoid)
        cost = tf.losses.mean_squared_error(labels_, predictions)

        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)


    # ### Validation accuracy
    #
    # Here we can add a few nodes to calculate the accuracy which we'll use in the validation pass.

    # In[23]:

    with graph.as_default():
        correct_pred = tf.equal(tf.cast(tf.round(predictions), tf.int32), labels_)
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))


    # ### Batching
    #
    # This is a simple function for returning batches from our data. First it removes data such that we only have full batches. Then it iterates through the `x` and `y` arrays and returns slices out of those arrays with size `[batch_size]`.

    # In[24]:

    def get_batches(x, y, batch_size=1):

        n_batches = len(x)//batch_size
        x, y = x[:n_batches*batch_size], y[:n_batches*batch_size]
        for ii in range(0, len(x), batch_size):
            yield x[ii:ii+batch_size], y[ii:ii+batch_size]


    # ## Training
    #
    # Below is the typical training code. If you want to do this yourself, feel free to delete all this code and implement it yourself. Before you run this, make sure the `checkpoints` directory exists.

    # In[28]:

    epochs = 1
    with graph.as_default():
        saver = tf.train.Saver()

    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())
        iteration = 1
        for e in range(epochs):
            state = sess.run(initial_state)

            for ii, (x, y) in enumerate(get_batches(train_x, train_y, batch_size), 1):
                feed = {inputs_: x,
                        labels_: y[:, None],
                        keep_prob: 0.5,
                        initial_state: state}
                loss, state, _ = sess.run([cost, final_state, optimizer], feed_dict=feed)

                if iteration%25==0:
                    val_acc = []
                    val_state = sess.run(cell.zero_state(batch_size, tf.float32))
                    for x, y in get_batches(val_x, val_y, batch_size):
                        feed = {inputs_: x,
                                labels_: y[:, None],
                                keep_prob: 1,
                                initial_state: val_state}
                        batch_acc, val_state, pred = sess.run([accuracy, final_state, predictions], feed_dict=feed)
                        val_acc.append(batch_acc)
                iteration +=1
                break
        saver.save(sess, "checkpoints/sentiment.ckpt")



    # ## Testing

    # In[30]:

    test_acc = []
    with tf.Session(graph=graph) as sess:
        saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
        test_state = sess.run(cell.zero_state(batch_size, tf.float32))
        for ii, (x, y) in enumerate(get_batches(test_x, test_y, batch_size), 1):
            feed = {inputs_: x,
                    labels_: y[:, None],
                    keep_prob: 1,
                    initial_state: test_state}
            batch_acc, test_state, pred = sess.run([accuracy, final_state, predictions], feed_dict=feed)
            test_acc.append(batch_acc)

    pred=pred[0][0]

    if pred > 0.5:
        consejo = 'up'
    else:
        consejo = 'down'

    return consejo
Exemplo n.º 57
0
# y_pred is the predicted y-value from our graph
y_pred = a * x_in + b

# y_act is a placeholder for the actual y-value
# associated with x_in
y_act = tf.placeholder(tf.float32, [None])

# Define our "loss" function, which is the same
# squared-difference as before. Recall that we do
# this instead of absolute value because it's
# easier to differentiate.
squared_diff = tf.square(y_pred - y_act)

# Create a SGD optimizer (built-in to TensorFlow)
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(squared_diff)

# Setup the TensorFlow session
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# This is the train loop. Here we pick a point, and
# direct TensorFlow to train the graph off that point.
for i in range(10000):
    xp, yp = x[i % num_pts], y[i % num_pts]
    sess.run(train_step, feed_dict={x_in: [xp], y_act: [yp]})

# Grap the final values for a and b after training 
# and print them for the user.
trained_a = sess.run(a)
trained_b = sess.run(b)
print('Trained Values: a = {}, b = {}'.format(trained_a, trained_b))
Exemplo n.º 58
0
def optimize_graph(logger=None, verbose=False):
    if not logger:
        logger = set_logger(colored('BERT_VEC', 'yellow'), verbose)
    try:
        # we don't need GPU for optimizing the graph
        from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference
        tf.gfile.MakeDirs(args.output_dir)

        config_fp = args.config_name
        logger.info('model config: %s' % config_fp)

        # 加载bert配置文件
        with tf.gfile.GFile(config_fp, 'r') as f:
            bert_config = modeling.BertConfig.from_dict(json.load(f))

        logger.info('build graph...')
        # input placeholders, not sure if they are friendly to XLA
        input_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_ids')
        input_mask = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_mask')
        input_type_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_type_ids')

        jit_scope = tf.contrib.compiler.jit.experimental_jit_scope

        with jit_scope():
            input_tensors = [input_ids, input_mask, input_type_ids]

            model = modeling.BertModel(
                config=bert_config,
                is_training=False,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=input_type_ids,
                use_one_hot_embeddings=False)

            # 获取所有要训练的变量
            tvars = tf.trainable_variables()

            init_checkpoint = args.ckpt_name
            (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,
                                                                                                       init_checkpoint)

            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

            # 共享卷积核
            with tf.variable_scope("pooling"):
                # 如果只有一层,就只取对应那一层的weight
                if len(args.layer_indexes) == 1:
                    encoder_layer = model.all_encoder_layers[args.layer_indexes[0]]
                else:
                    # 否则遍历需要取的层,把所有层的weight取出来并拼接起来shape:768*层数
                    all_layers = [model.all_encoder_layers[l] for l in args.layer_indexes]
                    encoder_layer = tf.concat(all_layers, -1)

            mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
            masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask(x, m), axis=1) / (
                    tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10)

            input_mask = tf.cast(input_mask, tf.float32)
            # 以下代码是句向量的生成方法,可以理解为做了一个卷积的操作,但是没有把结果相加, 卷积核是input_mask
            pooled = masked_reduce_mean(encoder_layer, input_mask)
            pooled = tf.identity(pooled, 'final_encodes')

            output_tensors = [pooled]
            tmp_g = tf.get_default_graph().as_graph_def()

        # allow_soft_placement:自动选择运行设备
        config = tf.ConfigProto(allow_soft_placement=True)
        with tf.Session(config=config) as sess:
            logger.info('load parameters from checkpoint...')
            sess.run(tf.global_variables_initializer())
            logger.info('freeze...')
            tmp_g = tf.graph_util.convert_variables_to_constants(sess, tmp_g, [n.name[:-2] for n in output_tensors])
            dtypes = [n.dtype for n in input_tensors]
            logger.info('optimize...')
            tmp_g = optimize_for_inference(
                tmp_g,
                [n.name[:-2] for n in input_tensors],
                [n.name[:-2] for n in output_tensors],
                [dtype.as_datatype_enum for dtype in dtypes],
                False)
        tmp_file = tempfile.NamedTemporaryFile('w', delete=False, dir=args.output_dir).name
        logger.info('write graph to a tmp file: %s' % tmp_file)
        with tf.gfile.GFile(tmp_file, 'wb') as f:
            f.write(tmp_g.SerializeToString())
        return tmp_file
    except Exception as e:
        logger.error('fail to optimize the graph!')
        logger.error(e)
Exemplo n.º 59
0
def train(x):
    """
    Trains the neural net
    :param x: Features placeholder
    :return: Trained neural net
    """
    prediction = convNeuralNet(x)
    #print prediction
    with tf.name_scope('cross_entropy'):
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
        tf.summary.scalar('cross_entropy',cost)

    with tf.name_scope('train'):
        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) # learning rate = 0.001

    with tf.name_scope('accuracy'):
        correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
        accuracy = tf.reduce_mean(tf.cast(correct,'float'))
        tf.summary.scalar('accuracy',accuracy)

    # cycles of feed forward and backprop
    num_epochs = ne

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        merged_summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter(os.getcwd()+tb_path)
        writer.add_graph(sess.graph)
        for epoch in range(num_epochs):
            epoch_loss = 0
            for i in range(int(real_X_9.shape[0])/batch_size):#mnist.train.num_examples/batch_size)): # X.shape[0]
                randidx = np.random.choice(real_X_9.shape[0], batch_size, replace=False)
                epoch_x,epoch_y = real_X_9[randidx,:],real_y_9[randidx,:] #mnist.train.next_batch(batch_size) # X,y
                j,c = sess.run([optimizer,cost],feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB})
                if i == 0:
                    [ta] = sess.run([accuracy],feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB})
                    print 'Train Accuracy', ta
                if epoch % 50 == 0 and i == 0:
                    #saver.save(sess,os.getcwd()+'/models/base/baseCNN18.ckpt')
                    #print 'Checkpoint saved at',os.getcwd()+'/models/base/baseCNN18'
                    pass
                    # ta_list.append(ta)
                if i % 5 == 0:
                    s = sess.run(merged_summary,feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB})
                    writer.add_summary(s,i)

                epoch_loss += c
            print '\n','Epoch', epoch + 1, 'completed out of', num_epochs, '\nLoss:',epoch_loss

        saver.save(sess, os.getcwd()+'/models/base/' + NAME)
        saver.export_meta_graph(os.getcwd()+'/models/base/' + NAME + '.meta')
        print 'Model saved'

        print '\n','Train Accuracy', accuracy.eval(feed_dict={x:real_X_9, y:real_y_9, keep_prob:TRAIN_KEEP_PROB})
        print '\n','Test Accuracy', accuracy.eval(feed_dict={x:test_real_X, y:test_real_y, keep_prob:1.0}) #X, y #mnist.test.images, mnist.test.labels

        #saver.save(sess,'baseDNN',global_step=1000)

        #print 'Prediction',sess.run(prediction, feed_dict={x:testtest, keep_prob:1})
        #print 'Prediction',sess.run(tf.argmax(prediction,1), feed_dict={x:testtest, keep_prob:1})
        #print test_real_y
        # correct_list = []
        # for i in range(len(sess.run(tf.argmax(prediction,1), feed_dict={x:testtest, keep_prob:1}))):
        #     if list(test_real_y[i]).index(1) == sess.run(tf.argmax(prediction,1), feed_dict={x:testtest, keep_prob:1})[i]:
        #         correct_list.append(True)
        #     else:
        #         correct_list.append(False)
        # print correct_list

        '''
def mode_base():
    parameters = []
    x_input = tf.placeholder(tf.float32, shape=(None, 1, 128, 1))
    y_lable = tf.placeholder(tf.float32, shape=(None, 3))  # 不指定 暂时3个

    # conv1_1
    with tf.name_scope('conv1_1') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 1, 64], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(x_input, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv1_1 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # conv1_2
    with tf.name_scope('conv1_2') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 64, 64], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(conv1_1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv1_2 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # pool1
    pool1 = tf.nn.max_pool(conv1_2,
                                ksize=[1, 1, 2, 1],
                                strides=[1, 1, 2, 1],
                                padding='SAME',
                                name='pool1')

    # conv2_1
    with tf.name_scope('conv2_1') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 64, 128], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv2_1 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # conv2_2
    with tf.name_scope('conv2_2') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 128, 128], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(conv2_1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv2_2 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # pool2
    pool2 = tf.nn.max_pool(conv2_2,
                                ksize=[1, 1, 2, 1],
                                strides=[1, 1, 2, 1],
                                padding='SAME',
                                name='pool2')

    # conv3_1
    with tf.name_scope('conv3_1') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 128, 256], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv3_1 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # conv3_2
    with tf.name_scope('conv3_2') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 256, 256], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(conv3_1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv3_2 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # conv3_3
    with tf.name_scope('conv3_3') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 256, 256], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(conv3_2, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv3_3 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # pool3
    pool3 = tf.nn.max_pool(conv3_3,
                                ksize=[1, 1, 2, 1],
                                strides=[1, 1, 2, 1],
                                padding='SAME',
                                name='pool3')

    # conv4_1
    with tf.name_scope('conv4_1') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 256, 512], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(pool3, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv4_1 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # conv4_2
    with tf.name_scope('conv4_2') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 512, 512], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(conv4_1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv4_2 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # conv4_3
    with tf.name_scope('conv4_3') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 512, 512], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(conv4_2, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv4_3 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # pool4
    pool4 = tf.nn.max_pool(conv4_3,
                                ksize=[1, 1, 2, 1],
                                strides=[1, 1, 2, 1],
                                padding='SAME',
                                name='pool4')

    # conv5_1
    with tf.name_scope('conv5_1') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 512, 512], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(pool4, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv5_1 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # conv5_2
    with tf.name_scope('conv5_2') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 512, 512], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(conv5_1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv5_2 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # conv5_3
    with tf.name_scope('conv5_3') as scope:
        kernel = tf.Variable(tf.truncated_normal([1, 3, 512, 512], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(conv5_2, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        conv5_3 = tf.nn.tanh(out, name=scope)
        parameters += [kernel, biases]

    # pool5
    pool5 = tf.nn.max_pool(conv5_3,
                                ksize=[1, 1, 2, 1],
                                strides=[1, 1, 2, 1],
                                padding='SAME',
                                name='pool4')



    shape = int(np.prod(pool5.get_shape()[1:]))

    pool5_flat = tf.reshape(pool5, [-1, shape])


    hiddenLayer1 = add_layer("layer1", pool5_flat, in_size=shape, out_size=2048, activation_function=tf.tanh)

    hiddenLayer2 = add_layer("layer2", hiddenLayer1, in_size=2048, out_size=1024, activation_function=tf.tanh)

    hiddenLayer3 = add_layer("layer3", hiddenLayer2, in_size=1024, out_size=512, activation_function=tf.tanh)

    hiddenLayer4 = add_layer("layer4", hiddenLayer3, in_size=512, out_size=128, activation_function=tf.tanh)

    hiddenLayer5 = add_layer("layer5", hiddenLayer4, in_size=128, out_size=16, activation_function=tf.tanh)

    prediction = add_layer("end", hiddenLayer5, in_size=16, out_size=3)

    losses = tf.nn.softmax_cross_entropy_with_logits(logits= prediction , labels=y_lable)

    # loss = tf.reduce_mean(tf.reduce_sum(y_lable - prediction))
    # loss = -tf.reduce_mean(y_lable * tf.log(tf.clip_by_value(prediction, 1e-10, 1.0)))
    loss = tf.reduce_mean(losses)

    train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss)

    init = tf.global_variables_initializer()

    sess = tf.Session()
    sess.run(init)
    return x_input, y_lable, prediction, loss, train_step, sess