Exemplo n.º 1
0
def train(images, labels, ckpt_path, dropout=False):
  """
  This function contains the loop that actually trains the model.
  :param images: a numpy array with the input data
  :param labels: a numpy array with the output labels
  :param ckpt_path: a path (including name) where model checkpoints are saved
  :param dropout: Boolean, whether to use dropout or not
  :return: True if everything went well
  """

  # Check training data
  assert len(images) == len(labels)
  assert images.dtype == np.float32
  assert labels.dtype == np.int32

  # Set default TF graph
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    # Declare data placeholder
    train_data_node = _input_placeholder()

    # Create a placeholder to hold labels
    train_labels_shape = (FLAGS.batch_size,)
    train_labels_node = tf.placeholder(tf.int32, shape=train_labels_shape)

    print("Done Initializing Training Placeholders")

    # Build a Graph that computes the logits predictions from the placeholder
    if FLAGS.deeper:
      logits = inference_deeper(train_data_node, dropout=dropout)
    else:
      logits = inference(train_data_node, dropout=dropout)

    # Calculate loss
    loss = loss_fun(logits, train_labels_node)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = train_op_fun(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.global_variables())

    print("Graph constructed and saver created")

    # Build an initialization operation to run below.
    init = tf.global_variables_initializer()

    # Create and init sessions
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) #NOLINT(long-line)
    sess.run(init)

    print("Session ready, beginning training loop")

    # Initialize the number of batches
    data_length = len(images)
    nb_batches = math.ceil(data_length / FLAGS.batch_size)

    for step in xrange(FLAGS.max_steps):
      # for debug, save start time
      start_time = time.time()

      # Current batch number
      batch_nb = step % nb_batches

      # Current batch start and end indices
      start, end = utils.batch_indices(batch_nb, data_length, FLAGS.batch_size)

      # Prepare dictionnary to feed the session with
      feed_dict = {train_data_node: images[range(start, end)],
                   train_labels_node: labels[range(start, end)]}

      # Run training step
      _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

      # Compute duration of training step
      duration = time.time() - start_time

      # Sanity check
      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      # Echo loss once in a while
      if step % 100 == 0:
        num_examples_per_step = FLAGS.batch_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = float(duration)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        saver.save(sess, ckpt_path, global_step=step)

  return True
Exemplo n.º 2
0
 def _bias_variable(shape):
     initial = tf.constant(0.1, shape = shape)
     return tf.Variable(initial)
Exemplo n.º 3
0
def weight_variable(shape, nm):
    # function to initialize weights
    initial = tf.truncated_normal(shape, stddev=0.1)
    tf.summary.histogram(nm, initial, collections=['always'])
    return tf.Variable(initial, name=nm)
Exemplo n.º 4
0
Justin Kahr
'''


tf.disable_v2_behavior()
print(tf.__version__)


XORin = [[0,0], [0,1], [1,0], [1,1]]
XORout = [[0], [1], [1], [0]]

x = tf.placeholder(tf.float32, shape=[4,2])
y = tf.placeholder(tf.float32, shape=[4,1])

# weights
w1 = tf.Variable([[1.0, 0.0],[1.0, 0.0]], shape=[2,2])
w2 = tf.Variable([[0.0], [1.0]], shape=[2,1])

# biases
b1 = tf.Variable([0.0, 0.0], shape=[2])
b2 = tf.Variable([0.0], shape=1)

# forward and back propigation
classification = tf.sigmoid(tf.matmul(tf.sigmoid(tf.matmul(x, w1) + b1), w2) + b2)

# error
e = tf.reduce_mean(tf.squared_difference(y, classification))
train = tf.train.GradientDescentOptimizer(0.1).minimize(e)
 
trainTime = time.time()
Exemplo n.º 5
0
import tensorflow.compat.v1 as tf
# 初始化两个变量,变量形状要与model.ckpt中相同
v1 = tf.Variable([11, 12, 13], dtype=tf.float32, name='v1')
v2 = tf.Variable([15, 16], dtype=tf.float32, name='v2')
# 声明一个tf.train.Sever类
saver = tf.train.Saver()
with tf.Session() as sess:
    # 加载./L2/model.ckpt下文件
    saver.restore(sess, './L2model/model.ckpt')
    # 打印两个变量的值
    print(sess.run(v1))
    print(sess.run(v2))
sess.close()
Exemplo n.º 6
0
def train_loop(pipeline_config_path,
               model_dir,
               config_override=None,
               train_steps=None,
               use_tpu=False,
               save_final_config=False,
               checkpoint_every_n=1000,
               checkpoint_max_to_keep=7,
               record_summaries=True,
               **kwargs):
    """Trains a model using eager + functions.

  This method:
    1. Processes the pipeline configs
    2. (Optionally) saves the as-run config
    3. Builds the model & optimizer
    4. Gets the training input data
    5. Loads a fine-tuning detection or classification checkpoint if requested
    6. Loops over the train data, executing distributed training steps inside
       tf.functions.
    7. Checkpoints the model every `checkpoint_every_n` training steps.
    8. Logs the training metrics as TensorBoard summaries.

  Args:
    pipeline_config_path: A path to a pipeline config file.
    model_dir:
      The directory to save checkpoints and summaries to.
    config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
      override the config from `pipeline_config_path`.
    train_steps: Number of training steps. If None, the number of training steps
      is set from the `TrainConfig` proto.
    use_tpu: Boolean, whether training and evaluation should run on TPU.
    save_final_config: Whether to save final config (obtained after applying
      overrides) to `model_dir`.
    checkpoint_every_n:
      Checkpoint every n training steps.
    checkpoint_max_to_keep:
      int, the number of most recent checkpoints to keep in the model directory.
    record_summaries: Boolean, whether or not to record summaries.
    **kwargs: Additional keyword arguments for configuration override.
  """
    ## Parse the configs
    get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
        'get_configs_from_pipeline_file']
    merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[
        'merge_external_params_with_configs']
    create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[
        'create_pipeline_proto_from_configs']

    configs = get_configs_from_pipeline_file(pipeline_config_path,
                                             config_override=config_override)
    kwargs.update({
        'train_steps':
        train_steps,
        'use_bfloat16':
        configs['train_config'].use_bfloat16 and use_tpu
    })
    configs = merge_external_params_with_configs(configs,
                                                 None,
                                                 kwargs_dict=kwargs)
    model_config = configs['model']
    train_config = configs['train_config']
    train_input_config = configs['train_input_config']

    unpad_groundtruth_tensors = train_config.unpad_groundtruth_tensors
    add_regularization_loss = train_config.add_regularization_loss
    clip_gradients_value = None
    if train_config.gradient_clipping_by_norm > 0:
        clip_gradients_value = train_config.gradient_clipping_by_norm

    # update train_steps from config but only when non-zero value is provided
    if train_steps is None and train_config.num_steps != 0:
        train_steps = train_config.num_steps

    if kwargs['use_bfloat16']:
        tf.compat.v2.keras.mixed_precision.experimental.set_policy(
            'mixed_bfloat16')

    if train_config.load_all_detection_checkpoint_vars:
        raise ValueError('train_pb2.load_all_detection_checkpoint_vars '
                         'unsupported in TF2')

    config_util.update_fine_tune_checkpoint_type(train_config)
    fine_tune_checkpoint_type = train_config.fine_tune_checkpoint_type
    fine_tune_checkpoint_version = train_config.fine_tune_checkpoint_version

    # Write the as-run pipeline config to disk.
    if save_final_config:
        pipeline_config_final = create_pipeline_proto_from_configs(configs)
        config_util.save_pipeline_config(pipeline_config_final, model_dir)

    # Build the model, optimizer, and training input
    strategy = tf.compat.v2.distribute.get_strategy()
    with strategy.scope():
        detection_model = model_builder.build(model_config=model_config,
                                              is_training=True)

        def train_dataset_fn(input_context):
            """Callable to create train input."""
            # Create the inputs.
            train_input = inputs.train_input(
                train_config=train_config,
                train_input_config=train_input_config,
                model_config=model_config,
                model=detection_model,
                input_context=input_context)
            train_input = train_input.repeat()
            return train_input

        train_input = strategy.experimental_distribute_datasets_from_function(
            train_dataset_fn)

        global_step = tf.Variable(
            0,
            trainable=False,
            dtype=tf.compat.v2.dtypes.int64,
            name='global_step',
            aggregation=tf.compat.v2.VariableAggregation.ONLY_FIRST_REPLICA)
        optimizer, (learning_rate, ) = optimizer_builder.build(
            train_config.optimizer, global_step=global_step)

        if callable(learning_rate):
            learning_rate_fn = learning_rate
        else:
            learning_rate_fn = lambda: learning_rate

    ## Train the model
    # Get the appropriate filepath (temporary or not) based on whether the worker
    # is the chief.
    summary_writer_filepath = get_filepath(strategy,
                                           os.path.join(model_dir, 'train'))
    if record_summaries:
        summary_writer = tf.compat.v2.summary.create_file_writer(
            summary_writer_filepath)
    else:
        summary_writer = tf2.summary.create_noop_writer()

    if use_tpu:
        num_steps_per_iteration = 100
    else:
        # TODO(b/135933080) Explore setting to 100 when GPU performance issues
        # are fixed.
        num_steps_per_iteration = 1

    with summary_writer.as_default():
        with strategy.scope():
            with tf.compat.v2.summary.record_if(
                    lambda: global_step % num_steps_per_iteration == 0):
                # Load a fine-tuning checkpoint.
                if train_config.fine_tune_checkpoint:
                    load_fine_tune_checkpoint(
                        detection_model, train_config.fine_tune_checkpoint,
                        fine_tune_checkpoint_type,
                        fine_tune_checkpoint_version, train_input,
                        unpad_groundtruth_tensors)

                ckpt = tf.compat.v2.train.Checkpoint(step=global_step,
                                                     model=detection_model,
                                                     optimizer=optimizer)

                manager_dir = get_filepath(strategy, model_dir)
                if not strategy.extended.should_checkpoint:
                    checkpoint_max_to_keep = 1
                manager = tf.compat.v2.train.CheckpointManager(
                    ckpt, manager_dir, max_to_keep=checkpoint_max_to_keep)

                # We use the following instead of manager.latest_checkpoint because
                # manager_dir does not point to the model directory when we are running
                # in a worker.
                latest_checkpoint = tf.train.latest_checkpoint(model_dir)
                ckpt.restore(latest_checkpoint)

                def train_step_fn(features, labels):
                    """Single train step."""
                    loss = eager_train_step(
                        detection_model,
                        features,
                        labels,
                        unpad_groundtruth_tensors,
                        optimizer,
                        learning_rate=learning_rate_fn(),
                        add_regularization_loss=add_regularization_loss,
                        clip_gradients_value=clip_gradients_value,
                        global_step=global_step,
                        num_replicas=strategy.num_replicas_in_sync)
                    global_step.assign_add(1)
                    return loss

                def _sample_and_train(strategy, train_step_fn, data_iterator):
                    features, labels = data_iterator.next()
                    if hasattr(tf.distribute.Strategy, 'run'):
                        per_replica_losses = strategy.run(train_step_fn,
                                                          args=(features,
                                                                labels))
                    else:
                        per_replica_losses = strategy.experimental_run_v2(
                            train_step_fn, args=(features, labels))
                    # TODO(anjalisridhar): explore if it is safe to remove the
                    ## num_replicas scaling of the loss and switch this to a ReduceOp.Mean
                    return strategy.reduce(tf.distribute.ReduceOp.SUM,
                                           per_replica_losses,
                                           axis=None)

                @tf.function
                def _dist_train_step(data_iterator):
                    """A distributed train step."""

                    if num_steps_per_iteration > 1:
                        for _ in tf.range(num_steps_per_iteration - 1):
                            # Following suggestion on yaqs/5402607292645376
                            with tf.name_scope(''):
                                _sample_and_train(strategy, train_step_fn,
                                                  data_iterator)

                    return _sample_and_train(strategy, train_step_fn,
                                             data_iterator)

                train_input_iter = iter(train_input)

                if int(global_step.value()) == 0:
                    manager.save()

                checkpointed_step = int(global_step.value())
                logged_step = global_step.value()

                last_step_time = time.time()
                for _ in range(global_step.value(), train_steps,
                               num_steps_per_iteration):

                    loss = _dist_train_step(train_input_iter)

                    time_taken = time.time() - last_step_time
                    last_step_time = time.time()

                    tf.compat.v2.summary.scalar('steps_per_sec',
                                                num_steps_per_iteration * 1.0 /
                                                time_taken,
                                                step=global_step)

                    if global_step.value() - logged_step >= 100:
                        tf.logging.info(
                            'Step {} per-step time {:.3f}s loss={:.3f}'.format(
                                global_step.value(),
                                time_taken / num_steps_per_iteration, loss))
                        logged_step = global_step.value()

                    if ((int(global_step.value()) - checkpointed_step) >=
                            checkpoint_every_n):
                        manager.save()
                        checkpointed_step = int(global_step.value())

    # Remove the checkpoint directories of the non-chief workers that
    # MultiWorkerMirroredStrategy forces us to save during sync distributed
    # training.
    clean_temporary_directories(strategy, manager_dir)
    clean_temporary_directories(strategy, summary_writer_filepath)
Exemplo n.º 7
0
 def build_model(self):
     x = tf.Variable(1.0)
     y = tf.Variable(2.0)
     z = x + y
     return z
Exemplo n.º 8
0
def bias_variable(shape):
  """Create a bias variable with appropriate initialization."""
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)
x_data = [[73., 80., 75.], [93., 88., 93.], [89., 91., 90.], [96., 98., 100.],
          [73., 66., 70.]]

y_data = [[152.], [185.], [180.], [196.], [142.]]

# placeholders for a tensor that will be always fed.
#                                           3은 원소(열)가 3개[x1, x2, x3]이고, 행은 여러개(N개)임을 의미함
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])

#                 w1
#[x1, x2, x3] * [ w2 ]
#                 w3

#                                    1은 w1 원소(열)가 하나이고, 행이 3이다.
W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis
hypothesis = tf.matmul(X, W) + b

# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))

# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
Exemplo n.º 10
0
def detection_loss(cls_outputs, box_outputs, labels, params):
    """Computes total detection loss.

  Computes total detection loss including box and class loss from all levels.
  Args:
    cls_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in [batch_size, height, width,
      num_anchors * 4].
    labels: the dictionary that returned from dataloader that includes
      groundtruth targets.
    params: the dictionary including training parameters specified in
      default_haprams function in this file.

  Returns:
    total_loss: an integer tensor representing total loss reducing from
      class and box losses from all levels.
    cls_loss: an integer tensor representing total class loss.
    box_loss: an integer tensor representing total box regression loss.
    box_iou_loss: an integer tensor representing total box iou loss.
  """
    # Sum all positives in a batch for normalization and avoid zero
    # num_positives_sum, which would lead to inf loss during training
    num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0
    positives_momentum = params.get('positives_momentum', None) or 0
    if positives_momentum > 0:
        # normalize the num_positive_examples for training stability.
        moving_normalizer_var = tf.Variable(
            0.0,
            name='moving_normalizer',
            dtype=tf.float32,
            synchronization=tf.VariableSynchronization.ON_READ,
            trainable=False,
            aggregation=tf.VariableAggregation.MEAN)
        num_positives_sum = tf.keras.backend.moving_average_update(
            moving_normalizer_var,
            num_positives_sum,
            momentum=params['positives_momentum'])
    elif positives_momentum < 0:
        num_positives_sum = utils.cross_replica_mean(num_positives_sum)

    levels = cls_outputs.keys()
    cls_losses = []
    box_losses = []
    for level in levels:
        # Onehot encoding for classification labels.
        cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level],
                                          params['num_classes'])

        if params['data_format'] == 'channels_first':
            bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list(
            )
            cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                              [bs, -1, width, height])
        else:
            bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list(
            )
            cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                              [bs, width, height, -1])
        box_targets_at_level = labels['box_targets_%d' % level]

        cls_loss = focal_loss(cls_outputs[level],
                              cls_targets_at_level,
                              params['alpha'],
                              params['gamma'],
                              normalizer=num_positives_sum,
                              label_smoothing=params['label_smoothing'])

        if params['data_format'] == 'channels_first':
            cls_loss = tf.reshape(
                cls_loss, [bs, -1, width, height, params['num_classes']])
        else:
            cls_loss = tf.reshape(
                cls_loss, [bs, width, height, -1, params['num_classes']])
        cls_loss *= tf.cast(
            tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2),
                           -1), tf.float32)
        cls_losses.append(tf.clip_by_value(tf.reduce_sum(cls_loss), 0.0, 2.0))

        if params['box_loss_weight']:
            box_losses.append(
                _box_loss(box_outputs[level],
                          box_targets_at_level,
                          num_positives_sum,
                          delta=params['delta']))

    if params['iou_loss_type']:
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        box_output_list = [tf.reshape(box_outputs[i], [-1, 4]) for i in levels]
        box_outputs = tf.concat(box_output_list, axis=0)
        box_target_list = [
            tf.reshape(labels['box_targets_%d' % level], [-1, 4])
            for level in levels
        ]
        box_targets = tf.concat(box_target_list, axis=0)
        anchor_boxes = tf.tile(input_anchors.boxes, [params['batch_size'], 1])
        box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes)
        box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes)
        box_iou_loss = _box_iou_loss(box_outputs, box_targets,
                                     num_positives_sum,
                                     params['iou_loss_type'])

    else:
        box_iou_loss = 0

    # Sum per level losses to total loss.
    cls_loss = tf.add_n(cls_losses)
    box_loss = tf.add_n(box_losses) if box_losses else 0

    total_loss = (cls_loss + params['box_loss_weight'] * box_loss +
                  params['iou_loss_weight'] * box_iou_loss)

    return total_loss, cls_loss, box_loss, box_iou_loss
Exemplo n.º 11
0
def weight_variable(shape):                             # 权重和偏置的初始化
  """Create a weight variable with appropriate initialization."""
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)
Exemplo n.º 12
0
    def __init__(self, dataset, hparams, forward_only=False):
        # dataset paramters
        self.dataset = dataset
        self.vocab_size = self.dataset.vocab_size
        self.review_size = self.dataset.review_size
        self.user_size = self.dataset.user_size
        self.product_size = self.dataset.product_size
        self.query_max_length = self.dataset.query_max_length
        self.vocab_distribute = self.dataset.vocab_distribute
        self.review_distribute = self.dataset.review_distribute
        self.product_distribute = self.dataset.product_distribute

        self.hparams = hparams
        self.negative_sample = self.hparams.negative_sample
        self.embed_size = self.hparams.embed_size
        self.window_size = self.hparams.window_size
        self.max_gradient_norm = self.hparams.max_gradient_norm
        self.init_learning_rate = self.hparams.init_learning_rate
        self.L2_lambda = self.hparams.L2_lambda
        self.net_struct = self.hparams.net_struct
        self.similarity_func = self.hparams.similarity_func
        self.query_weight = self.hparams.query_weight
        self.global_step = tf.Variable(0, trainable=False)
        self.print_ops = []
        if self.query_weight >= 0:
            self.Wu = tf.Variable(self.query_weight,
                                  name="user_weight",
                                  dtype=tf.float32,
                                  trainable=False)
        else:
            self.Wu = tf.sigmoid(
                tf.Variable(0, name="user_weight", dtype=tf.float32))

        # create placeholders
        self._create_placeholder()

        # specify model structure
        logging.info("Model Name " + self.net_struct)
        self.need_review = True
        if 'simplified' in self.net_struct:
            print('Simplified model')
            self.need_review = False

        self.need_context = False
        if 'hdc' in self.net_struct:
            print('Use context words')
            self.need_context = True

        if 'LSE' == self.net_struct:
            self.need_review = False
            self.need_context = True

        if self.need_context:
            self.context_word_idxs = []
            for i in xrange(2 * self.window_size):
                self.context_word_idxs.append(
                    tf.placeholder(tf.int64,
                                   shape=[None],
                                   name="context_idx{0}".format(i)))

        # Training losses.
        self.loss = None
        if 'LSE' == self.net_struct:
            self.loss = LSE.build_embedding_graph_and_loss(self)
        else:
            self.loss = HEM_builder.build_embedding_graph_and_loss(self)

        # Gradients and SGD update operation for training the model.
        params = tf.trainable_variables()
        if not forward_only:
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            self.gradients = tf.gradients(self.loss, params)

            self.clipped_gradients, self.norm = tf.clip_by_global_norm(
                self.gradients, self.max_gradient_norm)
            self.updates = opt.apply_gradients(zip(self.clipped_gradients,
                                                   params),
                                               global_step=self.global_step)

            #self.updates = opt.apply_gradients(zip(self.gradients, params),
            #                                 global_step=self.global_step)
        else:
            if 'LSE' == self.net_struct:
                self.product_scores = LSE.get_product_scores(
                    self, self.query_word_idxs)
            else:
                self.product_scores = HEM_builder.get_product_scores(
                    self, self.user_idxs, self.query_word_idxs)

        # Add tf.summary scalar
        tf.summary.scalar('Learning_rate',
                          self.learning_rate,
                          collections=['train'])
        tf.summary.scalar('Loss', self.loss, collections=['train'])
        self.train_summary = tf.summary.merge_all(key='train')

        self.saver = tf.train.Saver(tf.global_variables())
Exemplo n.º 13
0
v = DictVectorizer()
X_train = v.fit_transform(train_data)
X_test = v.transform(test_data)

X_train = X_train[:1000, :]
y_train = y_train[:1000, :]

n, p = X_train.shape
# number of latent factors
k = 5
# design matrix
X = tf.placeholder(tf.float32, shape=[n, p])
# target vector
y = tf.placeholder(tf.float32, shape=[n, 1])
# bias and weights
w0 = tf.Variable(tf.zeros([1]))
W = tf.Variable(tf.zeros([p]))
# interaction factors, randomly initialized
V = tf.Variable(tf.random_normal([k, p], stddev=0.01))  # V
# estimate of y, initialized to 0.
y_hat = tf.Variable(tf.zeros([n, 1]))

linear_terms = tf.add(w0, tf.reduce_sum(tf.multiply(W, X), 1, keepdims=True))
term1 = tf.pow(tf.matmul(X, tf.transpose(V)), 2)
term2 = tf.matmul(tf.pow(X, 2), tf.pow(tf.transpose(V), 2))
interactions = tf.reduce_sum(tf.subtract(term1, term2), 1, keepdims=True)

# L2 regularized sum of squares loss function over W and V
lambda_w = tf.constant(0.001, name='lambda_w')
lambda_v = tf.constant(0.001, name='lambda_v')
l2_norm = tf.add(tf.reduce_sum(tf.multiply(lambda_w, tf.pow(W, 2))),
Exemplo n.º 14
0
import tensorflow.compat.v1 as tf

tf.disable_v2_behavior()
tf.set_random_seed(777)

x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 2], [6, 2]]
y_data = [[0], [0], [0], [1], [1], [1]]

x = tf.placeholder(dtype=tf.float32,
                   shape=[None,
                          2])  # none은 앞에 들어오는건 상관없이 쓴다. batch size 맞춰주기 위해서.
y = tf.placeholder(dtype=tf.float32, shape=[None, 1])
w = tf.Variable(tf.random_normal([2, 1]))
b = tf.Variable(tf.random_normal([1]))
hypothesis = tf.sigmoid(tf.matmul(x, w) +
                        b)  # x1w1 + x2w2 + b -> # 1/1+e^-(ax+b)
cost = -tf.reduce_mean(y * tf.log(hypothesis) +
                       (1 - y) * tf.log(1 - hypothesis))  # cross entropy
update = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)
prediction = tf.cast(hypothesis > 0.5, dtype=tf.float32)  # 크면 1 작으면 0
accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, y),
                                  dtype=tf.float32))  # casting 한다. 같으면 1 다르면 0

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(10000):
        _cost, _ = sess.run([cost, update], feed_dict={x: x_data, y: y_data})
        if epoch % 200 == 0:
            print('epoch:{} cost:{}'.format(epoch, _cost))
    _h, _p, _a = sess.run([hypothesis, prediction, accuracy],
                          feed_dict={
Exemplo n.º 15
0
    # 네트워크 파라미터 설정
    img_size = 256
    regulation = 0.01
    learning_rate = 0.001
    batch_size = 10
    dropout_rate = 0.1

    # Placeholder 선언
    input_data = tf.placeholder(tf.float32, [None, img_size, img_size, 3],
                                name='input_data')
    input_label = tf.placeholder(tf.float32, [None, img_size, img_size, 3],
                                 name='input_label')

    # 가변 파라미터 설정
    W1 = tf.Variable(tf.random_normal([3, 3, 3, 128],
                                      dtype=tf.float32,
                                      stddev=0.01),
                     name='W_encoder1')
    b1 = tf.Variable(tf.zeros([128], dtype=tf.float32), name='b_encoder1')
    W2 = tf.Variable(tf.random_normal([3, 3, 128, 64],
                                      dtype=tf.float32,
                                      stddev=0.01),
                     name='W_encoder2')
    b2 = tf.Variable(tf.zeros([64], dtype=tf.float32), name='b_encoder2')
    W3 = tf.Variable(tf.random_normal([3, 3, 64, 32],
                                      dtype=tf.float32,
                                      stddev=0.03),
                     name='W_encoder3')
    b3 = tf.Variable(tf.zeros([32], dtype=tf.float32), name='b_encoder3')
    W4 = tf.Variable(tf.random_normal([3, 3, 32, 32],
                                      dtype=tf.float32,
                                      stddev=0.05),
Exemplo n.º 16
0
from rlcard.utils.logger import plot
# Make environment
env = rlcard.make('no-limit-holdem')
eval_env = rlcard.make('no-limit-holdem')
# Set a global seed
set_global_seed(0)

### Step 2: Initialize the NFSP agents. ###
import tensorflow.compat.v1 as tf
from rlcard.agents.nfsp_agent import NFSPAgent
tf.disable_v2_behavior()
memory_init_size = 1000
norm_step = 100
with tf.Session() as sess:
    # Set agents
    global_step = tf.Variable(0, name='global_step', trainable=False)
    agents = []
    for i in range(env.player_num):
        agent = NFSPAgent(sess,
                          scope='nfsp' + str(i),
                          action_num=env.action_num,
                          state_shape=env.state_shape,
                          hidden_layers_sizes=[128, 128],
                          min_buffer_size_to_learn=1000,
                          q_replay_memory_init_size=memory_init_size,
                          q_update_target_estimator_every=norm_step,
                          q_mlp_layers=[128, 128])
        agents.append(agent)
    # with sess.as_default():  #uncomment when loading
    #     saver = tf.train.Saver()
    #     saver.restore(sess, tf.train.latest_checkpoint(save_dir))
Exemplo n.º 17
0
'''
SFFF (S = start point, safe)
FHFH (F = Frozen surface, safe)
FFFH (H = hole)
HFFG (G = goal, target)
'''

tf.disable_v2_behavior()

env = gym.make('FrozenLake-v0')

tf.reset_default_graph()

#These lines establish the feed-forward part of the network used to choose actions
inputs1 = tf.placeholder(shape=[1, 16], dtype=tf.float32)
W = tf.Variable(tf.random_uniform([16, 4], 0, 0.01))
Qout = tf.matmul(inputs1, W)
predict = tf.argmax(Qout, 1)

#Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32)
loss = tf.reduce_sum(tf.square(nextQ - Qout))
trainer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
updateModel = trainer.minimize(loss)

init = tf.initialize_all_variables()

# Set learning parameters
y = .99
e = 0.1
num_episodes = 2000
def backword(mnist):
    # 给训练数据x,标签y_占位
    x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE])
    y_ = tf.placeholder(tf.float32, [None, mnist_forward.OUTPUT_NODE])
    # 使用前向传播过程,设置是否正则化,计算预测结果y
    y = mnist_forward.forward(x, REGULARIZER)
    # 轮数计数器,不可训练
    global_step = tf.Variable(0, trainable=False)

    # 定义交叉熵损失
    # 因为交叉熵一般和softmax回归一起使用,
    # 所以 tf.nn.sparse_softmax_cross_entropy_with_logits函数
    # 对这两个功能进行了封装。
    # 这里使用该函数进行加速交叉熵的计算,
    # 第一个参数是不包括softmax层的前向传播结果。
    # 第二个参数是训练数据的正确答案,
    # 这里得到的是正确答案的这里使用该函数进行加速交叉熵的计算,
    # 第一个参数是不包括softmax层的前向传播结果。
    # 第二个参数是训练数据的正确答案,这里得到的是正确答案的正确编号
    ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,
                                                        labels=tf.argmax(
                                                            y_, 1))
    # 计算当前batch中所有样例的交叉熵平均值
    cem = tf.reduce_mean(ce)
    # 总损失等于交叉熵损失和正则化损失的和
    loss = cem + tf.add_n(tf.get_collection('losses'))

    # 设定指数衰减学习率
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,
                                               global_step,
                                               mnist.train.num_examples /
                                               BATCH_SIZE,
                                               LEARNING_RATE_DECAY,
                                               staircase=True)

    # 定义反向传播方法
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        loss, global_step=global_step)

    # 滑动平均: 记录一段时间内模型的所有参数w和b各自的平均值,影子值,追随参数的变化而变化
    # MOVING_AVERAGE_DECAY: 滑动平均衰减率
    ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    ema_op = ema.apply(tf.trainable_variables())
    # 在训练神经网络时,每过一遍数据既需要通过反向传播来更新神经神经网络的参数,
    # 又需要更新每一个参数的滑动平均值,这里的 tf.control_dependencies
    with tf.control_dependencies([train_step, ema_op]):
        train_op = tf.no_op(name='train')

    saver = tf.train.Saver()

    with tf.Session() as sess:
        # 初始化
        tf.global_variables_initializer().run()

        ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

        for i in range(STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            _, loss_value, step = sess.run([train_op, loss, global_step],
                                           feed_dict={
                                               x: xs,
                                               y_: ys
                                           })
            if i % 1000 == 0:
                print("After %d training steps,loss on training batch is %g." %
                      (step, loss_value))
                saver.save(sess,
                           os.path.join(MODEL_SAVE_PATH, MODEL_NAME),
                           global_step=global_step)
Exemplo n.º 19
0
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MINIST_data/", one_hot=True)
import pylab
import tensorflow.compat.v1 as tf

tf.disable_eager_execution()

tf.reset_default_graph()
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

W = tf.Variable(tf.random_normal([784, 10]))
b = tf.Variable(tf.zeros([10]))

pred = tf.nn.softmax(tf.matmul(x, W) + b)

cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1))

learning_rate = 0.01

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

training_epochs = 25
batch_size = 100
display_step = 1

saver = tf.train.Saver(max_to_keep=1)
savedir = "model/"
fileprefix = "handwriting.ckpt"

with tf.Session() as sess:
Exemplo n.º 20
0
train_labels = labels[:, :1000]
test_labels = labels[:, 1000:]

# 定义激活函数
active_func = tf.nn.sigmoid
# active_func = tf.nn.relu

# 输入为 14*n,每列为一个样本;输出为 1*n,每列为对应预测值
x = tf.placeholder(dtype=tf.float64, shape=(f_num, None), name='x')
y = tf.placeholder(dtype=tf.float64, shape=(1, None), name='y')

# 第一层8个神经元  8 * 14
l1_node = 8
layer1W = tf.Variable(tf.random_normal([l1_node, f_num],
                                       stddev=1,
                                       dtype=tf.float64),
                      name='layer1Weights',
                      dtype=tf.float64)
layer1B = tf.Variable(tf.random_normal([l1_node, 1],
                                       stddev=1,
                                       dtype=tf.float64),
                      name='layer1Bias',
                      dtype=tf.float64)
l1Output = tf.matmul(layer1W, x) + layer1B
l1Output = active_func(l1Output)

# 第二层6个神经元  6 * 8
l2_node = 6
layer2W = tf.Variable(tf.random_normal([l2_node, l1_node],
                                       stddev=1,
                                       dtype=tf.float64),
Exemplo n.º 21
0
batch_size = 7  #how many windows of data we are passing at once
window_size = 7  #how big window_size is (Or How many days do we consider to predict next point in the sequence)
hidden_layer = 256  #How many units do we use in LSTM cell
clip_margin = 4  #To prevent exploding gradient, we use clipper to clip gradients below -margin or above this margin
learning_rate = 0.001  #This is a an optimization method that aims to reduce the loss function.
#Learning Rate is a parameter of the Gradient Descent algorithm which helps us control
#the change of weights for our network to the loss of gradient.
epochs = 200  #one forward pass and one backward pass of all the training examples, This is the number of iterations (forward and back propagation) our model needs to make.

#Placeholders allows us to send different data within our network with the tf.placeholder() command.
inputs = tf.placeholder(tf.float32, [batch_size, window_size, 1])
targets = tf.placeholder(tf.float32, [batch_size, 1])
print("input shape:", inputs.shape)
print("target shape:", targets.shape)
#Output layer weigts
weights_output = tf.Variable(
    tf.truncated_normal([hidden_layer, 1], stddev=0.05))
bias_output_layer = tf.Variable(tf.zeros([1]))

#perform forward propagation to predict the output.
#  A list is initialized to store the predicted output
outputs = []
#for each iteration output is computed and stored in the outputs list
for i in range(
        batch_size
):  # Iterates through every window in the batch. The Batch Size refers to the number of training samples propagated through the network
    # for each batch creating batch_state as all zeros and output for that window which is all zeros at the beginning as well.
    #initialize hidden state and cell state. np.zeros() Return a new array of given shape and type, filled with zeros.
    cell_state = np.zeros([1, hidden_layer], dtype=np.float32)
    hidden_state = np.zeros([1, hidden_layer], dtype=np.float32)
    #print("hidden state:", hidden_state)
Exemplo n.º 22
0
    def __init__(self, lr_rate=0.001, regular=0.005, trainable=False):
        self.parameter = []
        with tf.name_scope('input_layer'):
            self.input_x = tf.placeholder(dtype=tf.float32,
                                          shape=[None, 227, 227, 3],
                                          name='input_x')
            self.input_y = tf.placeholder(dtype=tf.float32,
                                          shape=[None, 1000],
                                          name='input_y')

        with tf.name_scope('first_conv_layer_part1'):
            kernel1_1 = tf.Variable(
                tf.truncated_normal(shape=[11, 11, 3, 48],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias1_1 = tf.Variable(tf.constant(value=0,
                                              shape=[48],
                                              dtype=tf.float32),
                                  name='kernel_bias')
            conv1_1 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(self.input_x,
                                 kernel1_1,
                                 strides=[1, 4, 4, 1],
                                 padding='VALID'), bias1_1))
            lrn1_1 = tf.nn.local_response_normalization(conv1_1,
                                                        depth_radius=2,
                                                        bias=1,
                                                        alpha=2e-05,
                                                        beta=0.75)
        self.parameter.append([kernel1_1, bias1_1])

        with tf.name_scope('first_conv_layer_part2'):
            kernel1_2 = tf.Variable(
                tf.truncated_normal(shape=[11, 11, 3, 48],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias1_2 = tf.Variable(tf.constant(value=0,
                                              shape=[48],
                                              dtype=tf.float32),
                                  name='kernel_bias')
            conv1_2 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(self.input_x,
                                 kernel1_2,
                                 strides=[1, 4, 4, 1],
                                 padding='VALID'), bias1_2))
            lrn1_2 = tf.nn.local_response_normalization(conv1_2,
                                                        depth_radius=2,
                                                        bias=1,
                                                        alpha=2e-05,
                                                        beta=0.75)
        self.parameter.append([kernel1_2, bias1_2])

        with tf.name_scope('first_maxpool_layer_part1'):
            maxpool1_1 = tf.nn.max_pool(lrn1_1,
                                        ksize=[1, 3, 3, 1],
                                        strides=[1, 2, 2, 1],
                                        padding='VALID')

        with tf.name_scope('fisrt_maxpool_layer_part2'):
            maxpool1_2 = tf.nn.max_pool(lrn1_2,
                                        ksize=[1, 3, 3, 1],
                                        strides=[1, 2, 2, 1],
                                        padding='VALID')

        with tf.name_scope('second_conv_layer_part1'):
            kernel2_1 = tf.Variable(
                tf.truncated_normal(shape=[5, 5, 48, 128],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias2_1 = tf.Variable(tf.constant(value=1,
                                              shape=[128],
                                              dtype=tf.float32),
                                  name='kernel_bias')
            conv2_1 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(maxpool1_1,
                                 kernel2_1,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME'), bias2_1))
            lrn2_1 = tf.nn.local_response_normalization(conv2_1,
                                                        depth_radius=2,
                                                        bias=1,
                                                        alpha=2e-05,
                                                        beta=0.75)
        self.parameter.append([kernel2_1, bias2_1])

        with tf.name_scope('second_conv_layer_part2'):
            kernel2_2 = tf.Variable(
                tf.truncated_normal(shape=[5, 5, 48, 128],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias2_2 = tf.Variable(tf.constant(value=0,
                                              shape=[128],
                                              dtype=tf.float32),
                                  name='kernel_bias')
            conv2_2 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(maxpool1_2,
                                 kernel2_2,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME'), bias2_2))
            lrn2_2 = tf.nn.local_response_normalization(conv2_2,
                                                        depth_radius=2,
                                                        bias=1,
                                                        alpha=2e-05,
                                                        beta=0.75)
        self.parameter.append([kernel2_2, bias2_2])

        with tf.name_scope('second_maxpool_layer_part1'):
            maxpool2_1 = tf.nn.max_pool(lrn2_1,
                                        ksize=[1, 3, 3, 1],
                                        strides=[1, 2, 2, 1],
                                        padding='VALID')

        with tf.name_scope('second_maxpool_layer_part2'):
            maxpool2_2 = tf.nn.max_pool(lrn2_2,
                                        ksize=[1, 3, 3, 1],
                                        strides=[1, 2, 2, 1],
                                        padding='VALID')
        '''in paper conv3 have four conv kernels so there have four,
           in many codes for alexnet they have only one kernel have shape=[3, 3, 256, 384]
           they are same,because 128X2=256, 192X2=384'''
        with tf.name_scope('third_conv_layer_part1'):
            kernel3_1 = tf.Variable(
                tf.truncated_normal(shape=[3, 3, 128, 192],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight1',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            conv3_1 = tf.nn.conv2d(maxpool2_1,
                                   kernel3_1,
                                   strides=[1, 1, 1, 1],
                                   padding='SAME')
            kernel3_2 = tf.Variable(
                tf.truncated_normal(shape=[3, 3, 128, 192],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name="kernel_weight2",
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            conv3_2 = tf.nn.conv2d(maxpool2_1,
                                   kernel3_2,
                                   strides=[1, 1, 1, 1],
                                   padding='SAME')
        self.parameter.append([kernel3_1, kernel3_2])

        with tf.name_scope('third_conv_layer_part2'):
            kernel3_3 = tf.Variable(
                tf.truncated_normal(shape=[3, 3, 128, 192],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight1',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            conv3_3 = tf.nn.conv2d(maxpool2_2,
                                   kernel3_3,
                                   strides=[1, 1, 1, 1],
                                   padding='SAME')
            kernel3_4 = tf.Variable(
                tf.truncated_normal(shape=[3, 3, 128, 192],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight2',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            conv3_4 = tf.nn.conv2d(maxpool2_2,
                                   kernel3_4,
                                   strides=[1, 1, 1, 1],
                                   padding='SAME')
        self.parameter.append([kernel3_3, kernel3_4])

        with tf.name_scope('make_two_as_one'):
            bias3_1 = tf.Variable(tf.constant(value=1,
                                              shape=[192],
                                              dtype=tf.float32),
                                  name='bias3_1')
            bias3_2 = tf.Variable(tf.constant(value=1,
                                              shape=[192],
                                              dtype=tf.float32),
                                  name='bias3_1')
            conv3_out1 = tf.nn.bias_add(tf.nn.relu(conv3_1 + conv3_3), bias3_1)
            conv3_out2 = tf.nn.bias_add(tf.nn.relu(conv3_2 + conv3_4), bias3_2)
        self.parameter.append([bias3_1, bias3_2])

        with tf.name_scope('fourth_conv_layer_part1'):
            kernel4_1 = tf.Variable(
                tf.truncated_normal(shape=[3, 3, 192, 192],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias4_1 = tf.Variable(tf.constant(value=1,
                                              shape=[192],
                                              dtype=tf.float32),
                                  name='kernel_bias')
            conv4_1 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(conv3_out1,
                                 kernel4_1,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME'), bias4_1))
        self.parameter.append([kernel4_1, bias4_1])

        with tf.name_scope('fourth_conv_layer_part2'):
            kernel4_2 = tf.Variable(
                tf.truncated_normal(shape=[3, 3, 192, 192],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias4_2 = tf.Variable(tf.constant(value=1,
                                              shape=[192],
                                              dtype=tf.float32),
                                  name='kernel_bias')
            conv4_2 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(conv3_out2,
                                 kernel4_2,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME'), bias4_2))
        self.parameter.append([kernel4_2, bias4_2])

        with tf.name_scope('fifth_conv_layer_part1'):
            kernel5_1 = tf.Variable(
                tf.truncated_normal(shape=[3, 3, 192, 128],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias5_1 = tf.Variable(tf.constant(value=1,
                                              shape=[128],
                                              dtype=tf.float32),
                                  name='kernel_bias')
            conv5_1 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(conv4_1,
                                 kernel5_1,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME'), bias5_1))
        self.parameter.append([kernel5_1, bias5_1])

        with tf.name_scope('fifth_conv_layer_part2'):
            kernel5_2 = tf.Variable(
                tf.truncated_normal(shape=[3, 3, 192, 128],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias5_2 = tf.Variable(tf.constant(value=1,
                                              shape=[128],
                                              dtype=tf.float32),
                                  name='kernel_bias')
            conv5_2 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(conv4_2,
                                 kernel5_2,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME'), bias5_2))
        self.parameter.append([kernel5_2, bias5_2])
        with tf.name_scope('fifth_maxpool_layer_part1'):
            maxpool5_1 = tf.nn.max_pool(conv5_1,
                                        ksize=[1, 3, 3, 1],
                                        strides=[1, 2, 2, 1],
                                        padding='VALID')

        with tf.name_scope('fifth_maxpool_layer_part2'):
            maxpool5_2 = tf.nn.max_pool(conv5_2,
                                        ksize=[1, 3, 3, 1],
                                        strides=[1, 2, 2, 1],
                                        padding='VALID')

        conv_out = tf.concat([maxpool5_1, maxpool5_2], 3)
        dim_list = conv_out.get_shape().as_list()[1:]
        shape_dim = np.prod(dim_list)
        reshaped = tf.reshape(conv_out, [-1, shape_dim])

        with tf.name_scope('first_fc_layer'):
            weight1 = tf.Variable(
                tf.truncated_normal(shape=[shape_dim, 4096],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='fc_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias6 = tf.Variable(tf.constant(shape=[4096],
                                            value=1,
                                            dtype=tf.float32),
                                name='fc_bias')
            fc1_out = tf.nn.relu(
                tf.nn.bias_add(tf.matmul(reshaped, weight1), bias6))
            if trainable:
                fc1_out = tf.nn.dropout(fc1_out, rate=0.5)
        self.parameter.append([weight1, bias6])

        with tf.name_scope('second_fc_layer'):
            weight2 = tf.Variable(
                tf.truncated_normal(shape=[4096, 4096],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='fc_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias7 = tf.Variable(tf.constant(shape=[4096],
                                            value=1,
                                            dtype=tf.float32),
                                name='fc_bias')
            fc2_out = tf.nn.relu(
                tf.nn.bias_add(tf.matmul(fc1_out, weight2), bias7))
            if trainable:
                fc2_out = tf.nn.dropout(fc2_out, rate=0.5)
        self.parameter.append([weight2, bias7])

        with tf.name_scope('thrid_fc_layer'):
            weight3 = tf.Variable(
                tf.truncated_normal(shape=[4096, 1000], stddev=0.01),
                dtype=tf.float32,
                name='fc_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias8 = tf.Variable(tf.constant(shape=[1000],
                                            value=1,
                                            dtype=tf.float32),
                                name='fc_bias')
            self.out = tf.nn.softmax(
                tf.nn.bias_add(tf.matmul(fc2_out, weight3), bias8))
        self.parameter.append([weight3, bias8])
        with tf.name_scope('loss'):
            regulation_loss = 0
            for i in tf.get_collection('loss'):
                tensor = tf.get_default_graph().get_tensor_by_name(i.name)
                regulation_loss += tf.nn.l2_loss(tensor)
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.input_y, logits=self.out))\
                + regulation_loss * regular
            self.train_loss_op = tf.train.GradientDescentOptimizer(
                lr_rate).minimize(self.loss)

        with tf.name_scope('accuracy'):
            self.accuracy = tf.reduce_mean(
                tf.cast(
                    tf.equal(tf.argmax(self.out, axis=1),
                             tf.argmax(self.input_y, axis=1)), tf.float32))
# init W_skipconn2even  channel input
k = 0
for j in range(0, code_PCM.shape[1], 1):
    for i in range(0, code_PCM.shape[0], 1):
        if (code_PCM[i, j] == 1):
            W_skipconn2even[j, k] = 1.0
            k += 1

##############################  bulid four neural networks(Z = 16,3, 10, 6) ############################
net_dict = {}
# init the learnable network parameters
Weights_Var = np.ones(sum_edge, dtype=np.float32)
Biases_Var = np.zeros(sum_edge, dtype=np.float32)
for i in range(0, iters_max, 1):
    net_dict["Weights_Var{0}".format(i)] = tf.Variable(
        Weights_Var.copy(), name="Weights_Var".format(i))
    net_dict["Biases_Var{0}".format(i)] = tf.Variable(
        Biases_Var.copy(), name="Biases_Var".format(i))

# the decoding neural network of Z=16
Z = 16
xa = tf.placeholder(tf.float32, shape=[batch_size, N, Z], name='xa')
ya = tf.placeholder(tf.float32, shape=[batch_size, N * Z], name='ya')
xa_input = tf.transpose(xa, [0, 2, 1])
net_dict["LLRa{0}".format(0)] = tf.zeros((batch_size, Z, sum_edge),
                                         dtype=tf.float32)
for i in range(0, iters_max, 1):
    #variable node update
    x0 = tf.matmul(xa_input, W_skipconn2even)
    x1 = tf.matmul(net_dict["LLRa{0}".format(i)], W_odd2even)
    x2 = tf.add(x0, x1)
Exemplo n.º 24
0
    def __init__(self, lr_rate=0.001, regular=0.005):
        self.parameter = []
        with tf.name_scope('input_layer'):
            self.input_x = tf.placeholder(dtype=tf.float32,
                                          shape=[None, 224, 224, 3],
                                          name='input_x')
            self.input_y = tf.placeholder(dtype=tf.float32,
                                          shape=[None, 1000],
                                          name='input_y')

        with tf.name_scope('first_conv_layer'):
            kernel = tf.Variable(
                tf.truncated_normal(shape=[11, 11, 3, 96],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias = tf.Variable(tf.constant(value=1,
                                           shape=[96],
                                           dtype=tf.float32),
                               name='kernel_bias')
            conv1 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(self.input_x,
                                 kernel,
                                 strides=[1, 4, 4, 1],
                                 padding='VALID'), bias))
        self.parameter.append([kernel, bias])
        with tf.name_scope('first_maxpoll_layer'):
            maxpool1 = tf.nn.max_pool(conv1,
                                      ksize=[1, 3, 3, 1],
                                      strides=[1, 2, 2, 1],
                                      padding='VALID')

        with tf.name_scope('second_conv_layer'):
            kernel2 = tf.Variable(
                tf.truncated_normal(shape=[5, 5, 96, 256],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias2 = tf.Variable(tf.constant(value=1,
                                            shape=[256],
                                            dtype=tf.float32),
                                name='kernel_bias')
            conv2 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(maxpool1,
                                 kernel2,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME'), bias2))
        self.parameter.append([kernel2, bias2])
        with tf.name_scope('second_maxpool_layer'):
            maxpool2 = tf.nn.max_pool(conv2,
                                      ksize=[1, 3, 3, 1],
                                      strides=[1, 2, 2, 1],
                                      padding='VALID')

        with tf.name_scope('third_conv_layer'):
            kernel3 = tf.Variable(
                tf.truncated_normal(shape=[3, 3, 256, 384],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias3 = tf.Variable(tf.constant(value=1,
                                            shape=[384],
                                            dtype=tf.float32),
                                name='kernel_bias')
            conv3 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(maxpool2,
                                 kernel3,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME'), bias3))
        self.parameter.append([kernel3, bias3])
        with tf.name_scope('fourth_conv_layer'):
            kernel4 = tf.Variable(
                tf.truncated_normal(shape=[3, 3, 384, 384],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias4 = tf.Variable(tf.constant(value=1,
                                            shape=[384],
                                            dtype=tf.float32),
                                name='kernel_bias')
            conv4 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(conv3,
                                 kernel4,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME'), bias4))
        self.parameter.append([kernel4, bias4])
        with tf.name_scope('fifth_conv_layer'):
            kernel5 = tf.Variable(
                tf.truncated_normal(shape=[3, 3, 384, 256],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='kernel_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias5 = tf.Variable(tf.constant(value=1,
                                            shape=[256],
                                            dtype=tf.float32),
                                name='kernel_bias')
            conv5 = tf.nn.relu(
                tf.nn.bias_add(
                    tf.nn.conv2d(conv4,
                                 kernel5,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME'), bias5))
        self.parameter.append([kernel5, bias5])
        dim_list = conv5.get_shape().as_list()[1:]
        shape_dim = np.prod(dim_list)
        reshaped = tf.reshape(conv5, [-1, shape_dim])

        with tf.name_scope('first_fc_layer'):
            weight1 = tf.Variable(
                tf.truncated_normal(shape=[shape_dim, 4096],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='fc_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias6 = tf.Variable(tf.constant(shape=[4096],
                                            value=1,
                                            dtype=tf.float32),
                                name='fc_bias')
            fc1_out = tf.nn.relu(
                tf.nn.bias_add(tf.matmul(reshaped, weight1), bias6))
            drop1 = tf.nn.dropout(fc1_out, rate=0.5)
        self.parameter.append([weight1, bias6])

        with tf.name_scope('second_fc_layer'):
            weight2 = tf.Variable(
                tf.truncated_normal(shape=[4096, 4096],
                                    stddev=0.01,
                                    dtype=tf.float32),
                name='fc_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias7 = tf.Variable(tf.constant(shape=[4096],
                                            value=1,
                                            dtype=tf.float32),
                                name='fc_bias')
            fc2_out = tf.nn.relu(
                tf.nn.bias_add(tf.matmul(drop1, weight2), bias7))
            drop2 = tf.nn.dropout(fc2_out, rate=0.5)
        self.parameter.append([weight2, bias7])
        with tf.name_scope('thrid_fc_layer'):
            weight3 = tf.Variable(
                tf.truncated_normal(shape=[4096, 1000], stddev=0.01),
                dtype=tf.float32,
                name='fc_weight',
                collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss'])
            bias8 = tf.Variable(tf.constant(shape=[1000],
                                            value=1,
                                            dtype=tf.float32),
                                name='fc_bias')
            self.out = tf.nn.softmax(
                tf.nn.bias_add(tf.matmul(drop2, weight3), bias8))
        self.parameter.append([weight3, bias8])
        with tf.name_scope('loss'):
            regulation_loss = 0
            for i in tf.get_collection('loss'):
                tensor = tf.get_default_graph().get_tensor_by_name(i.name)
                regulation_loss += tf.nn.l2_loss(tensor)
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.input_y, logits=self.out))\
                + regulation_loss * regular
            self.train_loss_op = tf.train.GradientDescentOptimizer(
                lr_rate).minimize(self.loss)

        with tf.name_scope('accuracy'):
            self.accuracy = tf.reduce_mean(
                tf.cast(
                    tf.equal(tf.argmax(self.out, axis=1),
                             tf.argmax(self.input_y, axis=1)), tf.float32))
Exemplo n.º 25
0
#print(l_t_matrix)
#print(max_val)
#print(non_zero)

# Build model.

num_input = num_users
num_hidden_1 = 10
num_hidden_2 = 5

X = tf.placeholder(tf.float64, [None, num_input])

weights = {
    'encoder_h1':
    tf.Variable(tf.random_normal([num_input, num_hidden_1], dtype=tf.float64)),
    'encoder_h2':
    tf.Variable(
        tf.random_normal([num_hidden_1, num_hidden_2], dtype=tf.float64)),
    'decoder_h1':
    tf.Variable(
        tf.random_normal([num_hidden_2, num_hidden_1], dtype=tf.float64)),
    'decoder_h2':
    tf.Variable(tf.random_normal([num_hidden_1, num_input], dtype=tf.float64)),
}

biases = {
    'encoder_b1':
    tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'encoder_b2':
    tf.Variable(tf.random_normal([num_hidden_2], dtype=tf.float64)),
Exemplo n.º 26
0
def train(train_list, val_list, debug_mode=True):
    print('Running PRLNet -Training!')
    # create folders to save trained model and results
    graph_dir = './graph'
    checkpt_dir = './model'
    ouput_dir = './output'
    exists_or_mkdir(graph_dir, need_remove=True)
    exists_or_mkdir(ouput_dir)
    exists_or_mkdir(checkpt_dir)

    # --------------------------------- load data ---------------------------------
    # data fetched at range: [-1,1]
    input_imgs, target_imgs, num = input_producer(train_list,
                                                  in_channels,
                                                  batch_size,
                                                  need_shuffle=True)
    if debug_mode:
        input_val, target_val, num_val = input_producer(val_list,
                                                        in_channels,
                                                        batch_size,
                                                        need_shuffle=False)

    pred_content, pred_detail, pred_imgs = gen_PRLNet(input_imgs,
                                                      out_channels,
                                                      is_train=True,
                                                      reuse=False)
    if debug_mode:
        _, _, pred_val = gen_PRLNet(input_val,
                                    out_channels,
                                    is_train=False,
                                    reuse=True)

    # --------------------------------- loss terms ---------------------------------
    with tf.name_scope('Loss') as loss_scp:
        target_224 = tf.image.resize_images(target_imgs,
                                            size=[224, 224],
                                            method=0,
                                            align_corners=False)
        predict_224 = tf.image.resize_images(pred_imgs,
                                             size=[224, 224],
                                             method=0,
                                             align_corners=False)
        vgg19_api = VGG19("vgg19.npy")
        vgg_map_targets = vgg19_api.build((target_224 + 1) / 2,
                                          is_rgb=(in_channels == 3))
        vgg_map_predict = vgg19_api.build((predict_224 + 1) / 2,
                                          is_rgb=(in_channels == 3))

        content_loss = tf.losses.mean_squared_error(target_imgs, pred_content)
        vgg_loss = 2e-6 * tf.losses.mean_squared_error(vgg_map_targets,
                                                       vgg_map_predict)
        l1_loss = tf.reduce_mean(tf.abs(target_imgs - pred_imgs))
        mse_loss = tf.losses.mean_squared_error(target_imgs, pred_imgs)

        loss_op = content_loss + 2 * vgg_loss + l1_loss

    # --------------------------------- solver definition ---------------------------------
    global_step = tf.Variable(0, name='global_step', trainable=False)
    iters_per_epoch = np.floor_divide(num, batch_size)
    lr_decay = tf.train.polynomial_decay(
        learning_rate=learning_rate,
        global_step=global_step,
        decay_steps=iters_per_epoch * n_epochs,
        end_learning_rate=learning_rate / 100.0,
        power=0.9)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.name_scope('optimizer'):
        with tf.control_dependencies(update_ops):
            gen_vars = [
                var for var in tf.trainable_variables()
                if var.name.startswith("PRLNet")
            ]
            gen_optim = tf.train.AdamOptimizer(lr_decay, beta1)
            gen_grads_and_vars = gen_optim.compute_gradients(loss_op,
                                                             var_list=gen_vars)
            train_op = gen_optim.apply_gradients(gen_grads_and_vars,
                                                 global_step=global_step)

    # --------------------------------- model training ---------------------------------
    '''
    if debug_mode:
        with tf.name_scope('summarise') as sum_scope:
            tf.summary.scalar('loss', loss_op)
            tf.summary.scalar('learning rate', lr_decay)
            tf.summary.image('predicts', pred_imgs, max_outputs=9)
            summary_op = tf.summary.merge_all()
    '''

    with tf.name_scope("parameter_count"):
        num_parameters = tf.reduce_sum(
            [tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()])

    # set GPU resources
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = 0.45

    saver = tf.train.Saver(max_to_keep=1)
    loss_list = []
    psnr_list = []
    with tf.Session(config=config) as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        sess.run(tf.global_variables_initializer())
        print(">>------------>>> [Training_Num] =%d" % num)
        print(">>------------>>> [Parameter_Num] =%d" %
              sess.run(num_parameters))
        '''
        if debug_mode:
            with tf.name_scope(sum_scope):
                summary_writer = tf.summary.FileWriter(graph_dir, graph=sess.graph)
        '''
        for epoch in range(0, n_epochs):
            start_time = time.time()
            epoch_loss, n_iters = 0, 0
            for step in range(0, num, batch_size):
                _, loss = sess.run([train_op, loss_op])
                epoch_loss += loss
                n_iters += 1
                # iteration information
                if n_iters % display_steps == 0:
                    tm = datetime.datetime.now().strftime(
                        '%Y-%m-%d %H:%M:%S.%f')
                    print("%s >> [%d/%d] iter: %d  loss: %4.4f" %
                          (tm, epoch, n_epochs, n_iters, loss))
                    '''
                    if debug_mode:
                        summary_str = sess.run(summary_op)
                        summary_writer.add_summary(summary_str, step)
                    '''

            # epoch information
            epoch_loss = epoch_loss / n_iters
            loss_list.append(epoch_loss)
            print(
                "[*] ----- Epoch: %d/%d | Loss: %4.4f | Time-consumed: %4.3f -----"
                % (epoch, n_epochs, epoch_loss, (time.time() - start_time)))

            if (epoch + 1) % save_epochs == 0:
                if debug_mode:
                    print("----- validating model ...")
                    mean_psnr, nn = 0, 0
                    for idx in range(0, num_val, batch_size):
                        predicts, groundtruths = sess.run(
                            [pred_val, target_val])
                        save_images_from_batch(predicts, ouput_dir, idx)
                        psnr = measure_psnr(predicts, groundtruths)
                        mean_psnr += psnr
                        nn += 1
                    psnr_list.append(mean_psnr / nn)
                    print("----- psnr:%4.4f" % (mean_psnr / nn))

                print("----- saving model  ...")
                saver.save(sess,
                           os.path.join(checkpt_dir, "model.cpkt"),
                           global_step=global_step)
                save_list(os.path.join(ouput_dir, "loss"), loss_list)
                save_list(os.path.join(ouput_dir, "psnr"), psnr_list)

        # stop data queue
        coord.request_stop()
        coord.join(threads)
        # write out the loss list
        save_list(os.path.join(ouput_dir, "loss"), loss_list)
        save_list(os.path.join(ouput_dir, "psnr"), psnr_list)
        print("Training finished!")

    return None
Exemplo n.º 27
0
    def initialise_model(self, numpy_embedding):
        """
        Initialises the TensorFlow Attract-Repel model.
        """
        self.attract_examples = tf.placeholder(
            tf.int32,
            [None, 2])  # each element is the position of word vector.
        self.repel_examples = tf.placeholder(
            tf.int32,
            [None, 2])  # each element is again the position of word vector.

        self.negative_examples_attract = tf.placeholder(tf.int32, [None, 2])
        self.negative_examples_repel = tf.placeholder(tf.int32, [None, 2])

        self.attract_margin = tf.placeholder("float")
        self.repel_margin = tf.placeholder("float")
        self.regularisation_constant = tf.placeholder("float")

        # Initial (distributional) vectors. Needed for L2 regularisation.
        self.W_init = tf.constant(numpy_embedding, name="W_init")

        # Variable storing the updated word vectors.
        self.W_dynamic = tf.Variable(numpy_embedding, name="W_dynamic")

        # Attract Cost Function:

        # placeholders for example pairs...
        attract_examples_left = tf.nn.l2_normalize(
            tf.nn.embedding_lookup(self.W_dynamic,
                                   self.attract_examples[:, 0]), 1)
        attract_examples_right = tf.nn.l2_normalize(
            tf.nn.embedding_lookup(self.W_dynamic,
                                   self.attract_examples[:, 1]), 1)

        # and their respective negative examples:
        negative_examples_attract_left = tf.nn.l2_normalize(
            tf.nn.embedding_lookup(self.W_dynamic,
                                   self.negative_examples_attract[:, 0]), 1)
        negative_examples_attract_right = tf.nn.l2_normalize(
            tf.nn.embedding_lookup(self.W_dynamic,
                                   self.negative_examples_attract[:, 1]), 1)

        # dot product between the example pairs.
        attract_similarity_between_examples = tf.reduce_sum(
            tf.multiply(attract_examples_left, attract_examples_right), 1)

        # dot product of each word in the example with its negative example.
        attract_similarity_to_negatives_left = tf.reduce_sum(
            tf.multiply(attract_examples_left, negative_examples_attract_left),
            1)
        attract_similarity_to_negatives_right = tf.reduce_sum(
            tf.multiply(attract_examples_right,
                        negative_examples_attract_right), 1)

        # and the final Attract Cost Function (sans regularisation):
        self.attract_cost = tf.nn.relu(
            self.attract_margin + attract_similarity_to_negatives_left - attract_similarity_between_examples) + \
                            tf.nn.relu(
                                self.attract_margin + attract_similarity_to_negatives_right - attract_similarity_between_examples)

        # Repel Cost Function:

        # placeholders for example pairs...
        repel_examples_left = tf.nn.l2_normalize(
            tf.nn.embedding_lookup(self.W_dynamic, self.repel_examples[:, 0]),
            1)  # becomes batch_size X vector_dimension
        repel_examples_right = tf.nn.l2_normalize(
            tf.nn.embedding_lookup(self.W_dynamic, self.repel_examples[:, 1]),
            1)

        # and their respective negative examples:
        negative_examples_repel_left = tf.nn.l2_normalize(
            tf.nn.embedding_lookup(self.W_dynamic,
                                   self.negative_examples_repel[:, 0]), 1)
        negative_examples_repel_right = tf.nn.l2_normalize(
            tf.nn.embedding_lookup(self.W_dynamic,
                                   self.negative_examples_repel[:, 1]), 1)

        # dot product between the example pairs.
        repel_similarity_between_examples = tf.reduce_sum(
            tf.multiply(repel_examples_left, repel_examples_right),
            1)  # becomes batch_size again, might need tf.squeeze

        # dot product of each word in the example with its negative example.
        repel_similarity_to_negatives_left = tf.reduce_sum(
            tf.multiply(repel_examples_left, negative_examples_repel_left), 1)
        repel_similarity_to_negatives_right = tf.reduce_sum(
            tf.multiply(repel_examples_right, negative_examples_repel_right),
            1)

        # and the final Repel Cost Function (sans regularisation):
        self.repel_cost = tf.nn.relu(
            self.repel_margin - repel_similarity_to_negatives_left + repel_similarity_between_examples) + \
                          tf.nn.relu(
                              self.repel_margin - repel_similarity_to_negatives_right + repel_similarity_between_examples)

        # The Regularisation Cost (separate for the two terms, depending on which one is called):

        # load the original distributional vectors for the example pairs:
        original_attract_examples_left = tf.nn.embedding_lookup(
            self.W_init, self.attract_examples[:, 0])
        original_attract_examples_right = tf.nn.embedding_lookup(
            self.W_init, self.attract_examples[:, 1])

        original_repel_examples_left = tf.nn.embedding_lookup(
            self.W_init, self.repel_examples[:, 0])
        original_repel_examples_right = tf.nn.embedding_lookup(
            self.W_init, self.repel_examples[:, 1])

        # and then define the respective regularisation costs:
        regularisation_cost_attract = self.regularisation_constant * (
            tf.nn.l2_loss(original_attract_examples_left -
                          attract_examples_left) +
            tf.nn.l2_loss(original_attract_examples_right -
                          attract_examples_right))
        self.attract_cost += regularisation_cost_attract

        regularisation_cost_repel = self.regularisation_constant * (
            tf.nn.l2_loss(original_repel_examples_left - repel_examples_left) +
            tf.nn.l2_loss(original_repel_examples_right -
                          repel_examples_right))
        self.repel_cost += regularisation_cost_repel

        # Finally, we define the training step functions for both steps.

        tvars = tf.trainable_variables()
        attract_grads = [
            tf.clip_by_value(grad, -2., 2.)
            for grad in tf.gradients(self.attract_cost, tvars)
        ]
        repel_grads = [
            tf.clip_by_value(grad, -2., 2.)
            for grad in tf.gradients(self.repel_cost, tvars)
        ]

        attract_optimiser = tf.train.AdagradOptimizer(0.05)
        repel_optimiser = tf.train.AdagradOptimizer(0.05)

        self.attract_cost_step = attract_optimiser.apply_gradients(
            list(zip(attract_grads, tvars)))
        self.repel_cost_step = repel_optimiser.apply_gradients(
            list(zip(repel_grads, tvars)))

        # return the handles for loading vectors from the TensorFlow embeddings:
        return attract_examples_left, attract_examples_right, repel_examples_left, repel_examples_right
    def __init__(self,
                 num_classes,
                 placeholders,
                 features,
                 adj,
                 degrees,
                 layer_infos,
                 concat=True,
                 aggregator_type="mean",
                 model_size="small",
                 sigmoid_loss=False,
                 identity_dim=0,
                 **kwargs):
        '''
        Args:
            - placeholders: Stanford TensorFlow placeholder object.
            - features: Numpy array with node features.
            - adj: Numpy array with adjacency lists (padded with random re-samples)
            - degrees: Numpy array with node degrees. 
            - layer_infos: List of SAGEInfo namedtuples that describe the parameters of all 
                   the recursive layers. See SAGEInfo definition above.
            - concat: whether to concatenate during recursive iterations
            - aggregator_type: how to aggregate neighbor information
            - model_size: one of "small" and "big"
            - sigmoid_loss: Set to true if nodes can belong to multiple classes
        '''

        models.GeneralizedModel.__init__(self, **kwargs)

        if aggregator_type == "mean":
            self.aggregator_cls = MeanAggregator
        elif aggregator_type == "seq":
            self.aggregator_cls = SeqAggregator
        elif aggregator_type == "meanpool":
            self.aggregator_cls = MeanPoolingAggregator
        elif aggregator_type == "maxpool":
            self.aggregator_cls = MaxPoolingAggregator
        elif aggregator_type == "gcn":
            self.aggregator_cls = GCNAggregator
        else:
            raise Exception("Unknown aggregator: ", self.aggregator_cls)

        # get info from placeholders...
        self.inputs1 = placeholders["batch"]
        self.model_size = model_size
        self.adj_info = adj
        if identity_dim > 0:
            self.embeds = tf.get_variable(
                "node_embeddings",
                [adj.get_shape().as_list()[0], identity_dim])
        else:
            self.embeds = None
        if features is None:
            if identity_dim == 0:
                raise Exception(
                    "Must have a positive value for identity feature dimension if no input features given."
                )
            self.features = self.embeds
        else:
            self.features = tf.Variable(tf.constant(features,
                                                    dtype=tf.float32),
                                        trainable=False)
            if not self.embeds is None:
                self.features = tf.concat([self.embeds, self.features], axis=1)
        self.degrees = degrees
        self.concat = concat
        self.num_classes = num_classes
        self.sigmoid_loss = sigmoid_loss
        self.dims = [
            (0 if features is None else features.shape[1]) + identity_dim
        ]
        self.dims.extend(
            [layer_infos[i].output_dim for i in range(len(layer_infos))])
        self.batch_size = placeholders["batch_size"]
        self.placeholders = placeholders
        self.layer_infos = layer_infos

        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=FLAGS.learning_rate)

        self.build()
Exemplo n.º 29
0
def weight_variable(shape, stddev):
    initial = tf.truncated_normal(shape, stddev=stddev)
    return tf.Variable(initial)
Exemplo n.º 30
0
    def batch_norm_wrapper(self,
                           inputs,
                           batch_name,
                           is_training=True,
                           epsilon=1e-05,
                           decay=0.9):
        """
        Layer to handle batch norm training and inference

        Parameters
        ----------
        inputs: TensorFlow Tensor
            4d tensor of NHWC format
        batch_name: string
            Name for the batch norm layer
        is_training: bool
            True if training and False if running validation; updates based on is_train from params
        epsilon: float
            Small, non-zero value added to variance to avoid divide-by-zero error
        decay: float
            Decay for the moving average


        Returns
        -------
        return: TensorFlow Tensor
            Result of batch norm layer
        """
        dim_of_x = inputs.get_shape()[-1]

        shadow_mean = _tf.Variable(
            _tf.zeros(shape=[dim_of_x], dtype="float32"),
            name=batch_name + "running_mean",
            trainable=False,
        )

        shadow_var = _tf.Variable(
            _tf.ones(shape=[dim_of_x], dtype="float32"),
            name=batch_name + "running_var",
            trainable=False,
        )
        axes = list(range(len(inputs.get_shape()) - 1))

        # Calculate mean and variance for a batch
        batch_mean, batch_var = _tf.nn.moments(inputs, axes, name="moments")

        def mean_var_update():
            with _tf.control_dependencies([
                    _tf.assign(
                        shadow_mean,
                        _tf.multiply(shadow_mean, decay) +
                        _tf.multiply(batch_mean, 1.0 - decay),
                    ),
                    _tf.assign(
                        shadow_var,
                        _tf.multiply(shadow_var, decay) +
                        _tf.multiply(batch_var, 1.0 - decay),
                    ),
            ]):
                return _tf.identity(batch_mean), _tf.identity(batch_var)

        mean, variance = _tf.cond(
            _tf.cast(is_training, _tf.bool),
            mean_var_update,
            lambda: (_tf.identity(shadow_mean), _tf.identity(shadow_var)),
        )
        beta = _tf.Variable(
            _tf.zeros(shape=dim_of_x, dtype="float32"),
            name=batch_name + "beta",
            trainable=True,
        )  # Offset/Shift
        gamma = _tf.Variable(
            _tf.ones(shape=dim_of_x, dtype="float32"),
            name=batch_name + "gamma",
            trainable=True,
        )  # Scale

        return _tf.nn.batch_normalization(inputs, mean, variance, beta, gamma,
                                          epsilon)