def train(images, labels, ckpt_path, dropout=False): """ This function contains the loop that actually trains the model. :param images: a numpy array with the input data :param labels: a numpy array with the output labels :param ckpt_path: a path (including name) where model checkpoints are saved :param dropout: Boolean, whether to use dropout or not :return: True if everything went well """ # Check training data assert len(images) == len(labels) assert images.dtype == np.float32 assert labels.dtype == np.int32 # Set default TF graph with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Declare data placeholder train_data_node = _input_placeholder() # Create a placeholder to hold labels train_labels_shape = (FLAGS.batch_size,) train_labels_node = tf.placeholder(tf.int32, shape=train_labels_shape) print("Done Initializing Training Placeholders") # Build a Graph that computes the logits predictions from the placeholder if FLAGS.deeper: logits = inference_deeper(train_data_node, dropout=dropout) else: logits = inference(train_data_node, dropout=dropout) # Calculate loss loss = loss_fun(logits, train_labels_node) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = train_op_fun(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.global_variables()) print("Graph constructed and saver created") # Build an initialization operation to run below. init = tf.global_variables_initializer() # Create and init sessions sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) #NOLINT(long-line) sess.run(init) print("Session ready, beginning training loop") # Initialize the number of batches data_length = len(images) nb_batches = math.ceil(data_length / FLAGS.batch_size) for step in xrange(FLAGS.max_steps): # for debug, save start time start_time = time.time() # Current batch number batch_nb = step % nb_batches # Current batch start and end indices start, end = utils.batch_indices(batch_nb, data_length, FLAGS.batch_size) # Prepare dictionnary to feed the session with feed_dict = {train_data_node: images[range(start, end)], train_labels_node: labels[range(start, end)]} # Run training step _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) # Compute duration of training step duration = time.time() - start_time # Sanity check assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # Echo loss once in a while if step % 100 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, ckpt_path, global_step=step) return True
def _bias_variable(shape): initial = tf.constant(0.1, shape = shape) return tf.Variable(initial)
def weight_variable(shape, nm): # function to initialize weights initial = tf.truncated_normal(shape, stddev=0.1) tf.summary.histogram(nm, initial, collections=['always']) return tf.Variable(initial, name=nm)
Justin Kahr ''' tf.disable_v2_behavior() print(tf.__version__) XORin = [[0,0], [0,1], [1,0], [1,1]] XORout = [[0], [1], [1], [0]] x = tf.placeholder(tf.float32, shape=[4,2]) y = tf.placeholder(tf.float32, shape=[4,1]) # weights w1 = tf.Variable([[1.0, 0.0],[1.0, 0.0]], shape=[2,2]) w2 = tf.Variable([[0.0], [1.0]], shape=[2,1]) # biases b1 = tf.Variable([0.0, 0.0], shape=[2]) b2 = tf.Variable([0.0], shape=1) # forward and back propigation classification = tf.sigmoid(tf.matmul(tf.sigmoid(tf.matmul(x, w1) + b1), w2) + b2) # error e = tf.reduce_mean(tf.squared_difference(y, classification)) train = tf.train.GradientDescentOptimizer(0.1).minimize(e) trainTime = time.time()
import tensorflow.compat.v1 as tf # 初始化两个变量,变量形状要与model.ckpt中相同 v1 = tf.Variable([11, 12, 13], dtype=tf.float32, name='v1') v2 = tf.Variable([15, 16], dtype=tf.float32, name='v2') # 声明一个tf.train.Sever类 saver = tf.train.Saver() with tf.Session() as sess: # 加载./L2/model.ckpt下文件 saver.restore(sess, './L2model/model.ckpt') # 打印两个变量的值 print(sess.run(v1)) print(sess.run(v2)) sess.close()
def train_loop(pipeline_config_path, model_dir, config_override=None, train_steps=None, use_tpu=False, save_final_config=False, checkpoint_every_n=1000, checkpoint_max_to_keep=7, record_summaries=True, **kwargs): """Trains a model using eager + functions. This method: 1. Processes the pipeline configs 2. (Optionally) saves the as-run config 3. Builds the model & optimizer 4. Gets the training input data 5. Loads a fine-tuning detection or classification checkpoint if requested 6. Loops over the train data, executing distributed training steps inside tf.functions. 7. Checkpoints the model every `checkpoint_every_n` training steps. 8. Logs the training metrics as TensorBoard summaries. Args: pipeline_config_path: A path to a pipeline config file. model_dir: The directory to save checkpoints and summaries to. config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to override the config from `pipeline_config_path`. train_steps: Number of training steps. If None, the number of training steps is set from the `TrainConfig` proto. use_tpu: Boolean, whether training and evaluation should run on TPU. save_final_config: Whether to save final config (obtained after applying overrides) to `model_dir`. checkpoint_every_n: Checkpoint every n training steps. checkpoint_max_to_keep: int, the number of most recent checkpoints to keep in the model directory. record_summaries: Boolean, whether or not to record summaries. **kwargs: Additional keyword arguments for configuration override. """ ## Parse the configs get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[ 'get_configs_from_pipeline_file'] merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[ 'merge_external_params_with_configs'] create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[ 'create_pipeline_proto_from_configs'] configs = get_configs_from_pipeline_file(pipeline_config_path, config_override=config_override) kwargs.update({ 'train_steps': train_steps, 'use_bfloat16': configs['train_config'].use_bfloat16 and use_tpu }) configs = merge_external_params_with_configs(configs, None, kwargs_dict=kwargs) model_config = configs['model'] train_config = configs['train_config'] train_input_config = configs['train_input_config'] unpad_groundtruth_tensors = train_config.unpad_groundtruth_tensors add_regularization_loss = train_config.add_regularization_loss clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm # update train_steps from config but only when non-zero value is provided if train_steps is None and train_config.num_steps != 0: train_steps = train_config.num_steps if kwargs['use_bfloat16']: tf.compat.v2.keras.mixed_precision.experimental.set_policy( 'mixed_bfloat16') if train_config.load_all_detection_checkpoint_vars: raise ValueError('train_pb2.load_all_detection_checkpoint_vars ' 'unsupported in TF2') config_util.update_fine_tune_checkpoint_type(train_config) fine_tune_checkpoint_type = train_config.fine_tune_checkpoint_type fine_tune_checkpoint_version = train_config.fine_tune_checkpoint_version # Write the as-run pipeline config to disk. if save_final_config: pipeline_config_final = create_pipeline_proto_from_configs(configs) config_util.save_pipeline_config(pipeline_config_final, model_dir) # Build the model, optimizer, and training input strategy = tf.compat.v2.distribute.get_strategy() with strategy.scope(): detection_model = model_builder.build(model_config=model_config, is_training=True) def train_dataset_fn(input_context): """Callable to create train input.""" # Create the inputs. train_input = inputs.train_input( train_config=train_config, train_input_config=train_input_config, model_config=model_config, model=detection_model, input_context=input_context) train_input = train_input.repeat() return train_input train_input = strategy.experimental_distribute_datasets_from_function( train_dataset_fn) global_step = tf.Variable( 0, trainable=False, dtype=tf.compat.v2.dtypes.int64, name='global_step', aggregation=tf.compat.v2.VariableAggregation.ONLY_FIRST_REPLICA) optimizer, (learning_rate, ) = optimizer_builder.build( train_config.optimizer, global_step=global_step) if callable(learning_rate): learning_rate_fn = learning_rate else: learning_rate_fn = lambda: learning_rate ## Train the model # Get the appropriate filepath (temporary or not) based on whether the worker # is the chief. summary_writer_filepath = get_filepath(strategy, os.path.join(model_dir, 'train')) if record_summaries: summary_writer = tf.compat.v2.summary.create_file_writer( summary_writer_filepath) else: summary_writer = tf2.summary.create_noop_writer() if use_tpu: num_steps_per_iteration = 100 else: # TODO(b/135933080) Explore setting to 100 when GPU performance issues # are fixed. num_steps_per_iteration = 1 with summary_writer.as_default(): with strategy.scope(): with tf.compat.v2.summary.record_if( lambda: global_step % num_steps_per_iteration == 0): # Load a fine-tuning checkpoint. if train_config.fine_tune_checkpoint: load_fine_tune_checkpoint( detection_model, train_config.fine_tune_checkpoint, fine_tune_checkpoint_type, fine_tune_checkpoint_version, train_input, unpad_groundtruth_tensors) ckpt = tf.compat.v2.train.Checkpoint(step=global_step, model=detection_model, optimizer=optimizer) manager_dir = get_filepath(strategy, model_dir) if not strategy.extended.should_checkpoint: checkpoint_max_to_keep = 1 manager = tf.compat.v2.train.CheckpointManager( ckpt, manager_dir, max_to_keep=checkpoint_max_to_keep) # We use the following instead of manager.latest_checkpoint because # manager_dir does not point to the model directory when we are running # in a worker. latest_checkpoint = tf.train.latest_checkpoint(model_dir) ckpt.restore(latest_checkpoint) def train_step_fn(features, labels): """Single train step.""" loss = eager_train_step( detection_model, features, labels, unpad_groundtruth_tensors, optimizer, learning_rate=learning_rate_fn(), add_regularization_loss=add_regularization_loss, clip_gradients_value=clip_gradients_value, global_step=global_step, num_replicas=strategy.num_replicas_in_sync) global_step.assign_add(1) return loss def _sample_and_train(strategy, train_step_fn, data_iterator): features, labels = data_iterator.next() if hasattr(tf.distribute.Strategy, 'run'): per_replica_losses = strategy.run(train_step_fn, args=(features, labels)) else: per_replica_losses = strategy.experimental_run_v2( train_step_fn, args=(features, labels)) # TODO(anjalisridhar): explore if it is safe to remove the ## num_replicas scaling of the loss and switch this to a ReduceOp.Mean return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) @tf.function def _dist_train_step(data_iterator): """A distributed train step.""" if num_steps_per_iteration > 1: for _ in tf.range(num_steps_per_iteration - 1): # Following suggestion on yaqs/5402607292645376 with tf.name_scope(''): _sample_and_train(strategy, train_step_fn, data_iterator) return _sample_and_train(strategy, train_step_fn, data_iterator) train_input_iter = iter(train_input) if int(global_step.value()) == 0: manager.save() checkpointed_step = int(global_step.value()) logged_step = global_step.value() last_step_time = time.time() for _ in range(global_step.value(), train_steps, num_steps_per_iteration): loss = _dist_train_step(train_input_iter) time_taken = time.time() - last_step_time last_step_time = time.time() tf.compat.v2.summary.scalar('steps_per_sec', num_steps_per_iteration * 1.0 / time_taken, step=global_step) if global_step.value() - logged_step >= 100: tf.logging.info( 'Step {} per-step time {:.3f}s loss={:.3f}'.format( global_step.value(), time_taken / num_steps_per_iteration, loss)) logged_step = global_step.value() if ((int(global_step.value()) - checkpointed_step) >= checkpoint_every_n): manager.save() checkpointed_step = int(global_step.value()) # Remove the checkpoint directories of the non-chief workers that # MultiWorkerMirroredStrategy forces us to save during sync distributed # training. clean_temporary_directories(strategy, manager_dir) clean_temporary_directories(strategy, summary_writer_filepath)
def build_model(self): x = tf.Variable(1.0) y = tf.Variable(2.0) z = x + y return z
def bias_variable(shape): """Create a bias variable with appropriate initialization.""" initial = tf.constant(0.1, shape=shape) return tf.Variable(initial)
x_data = [[73., 80., 75.], [93., 88., 93.], [89., 91., 90.], [96., 98., 100.], [73., 66., 70.]] y_data = [[152.], [185.], [180.], [196.], [142.]] # placeholders for a tensor that will be always fed. # 3은 원소(열)가 3개[x1, x2, x3]이고, 행은 여러개(N개)임을 의미함 X = tf.placeholder(tf.float32, shape=[None, 3]) Y = tf.placeholder(tf.float32, shape=[None, 1]) # w1 #[x1, x2, x3] * [ w2 ] # w3 # 1은 w1 원소(열)가 하나이고, 행이 3이다. W = tf.Variable(tf.random_normal([3, 1]), name='weight') b = tf.Variable(tf.random_normal([1]), name='bias') # Hypothesis hypothesis = tf.matmul(X, W) + b # Simplified cost/loss function cost = tf.reduce_mean(tf.square(hypothesis - Y)) # Minimize optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5) train = optimizer.minimize(cost) # Launch the graph in a session. sess = tf.Session() # Initializes global variables in the graph.
def detection_loss(cls_outputs, box_outputs, labels, params): """Computes total detection loss. Computes total detection loss including box and class loss from all levels. Args: cls_outputs: an OrderDict with keys representing levels and values representing logits in [batch_size, height, width, num_anchors]. box_outputs: an OrderDict with keys representing levels and values representing box regression targets in [batch_size, height, width, num_anchors * 4]. labels: the dictionary that returned from dataloader that includes groundtruth targets. params: the dictionary including training parameters specified in default_haprams function in this file. Returns: total_loss: an integer tensor representing total loss reducing from class and box losses from all levels. cls_loss: an integer tensor representing total class loss. box_loss: an integer tensor representing total box regression loss. box_iou_loss: an integer tensor representing total box iou loss. """ # Sum all positives in a batch for normalization and avoid zero # num_positives_sum, which would lead to inf loss during training num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0 positives_momentum = params.get('positives_momentum', None) or 0 if positives_momentum > 0: # normalize the num_positive_examples for training stability. moving_normalizer_var = tf.Variable( 0.0, name='moving_normalizer', dtype=tf.float32, synchronization=tf.VariableSynchronization.ON_READ, trainable=False, aggregation=tf.VariableAggregation.MEAN) num_positives_sum = tf.keras.backend.moving_average_update( moving_normalizer_var, num_positives_sum, momentum=params['positives_momentum']) elif positives_momentum < 0: num_positives_sum = utils.cross_replica_mean(num_positives_sum) levels = cls_outputs.keys() cls_losses = [] box_losses = [] for level in levels: # Onehot encoding for classification labels. cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level], params['num_classes']) if params['data_format'] == 'channels_first': bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, -1, width, height]) else: bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, width, height, -1]) box_targets_at_level = labels['box_targets_%d' % level] cls_loss = focal_loss(cls_outputs[level], cls_targets_at_level, params['alpha'], params['gamma'], normalizer=num_positives_sum, label_smoothing=params['label_smoothing']) if params['data_format'] == 'channels_first': cls_loss = tf.reshape( cls_loss, [bs, -1, width, height, params['num_classes']]) else: cls_loss = tf.reshape( cls_loss, [bs, width, height, -1, params['num_classes']]) cls_loss *= tf.cast( tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2), -1), tf.float32) cls_losses.append(tf.clip_by_value(tf.reduce_sum(cls_loss), 0.0, 2.0)) if params['box_loss_weight']: box_losses.append( _box_loss(box_outputs[level], box_targets_at_level, num_positives_sum, delta=params['delta'])) if params['iou_loss_type']: input_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) box_output_list = [tf.reshape(box_outputs[i], [-1, 4]) for i in levels] box_outputs = tf.concat(box_output_list, axis=0) box_target_list = [ tf.reshape(labels['box_targets_%d' % level], [-1, 4]) for level in levels ] box_targets = tf.concat(box_target_list, axis=0) anchor_boxes = tf.tile(input_anchors.boxes, [params['batch_size'], 1]) box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes) box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes) box_iou_loss = _box_iou_loss(box_outputs, box_targets, num_positives_sum, params['iou_loss_type']) else: box_iou_loss = 0 # Sum per level losses to total loss. cls_loss = tf.add_n(cls_losses) box_loss = tf.add_n(box_losses) if box_losses else 0 total_loss = (cls_loss + params['box_loss_weight'] * box_loss + params['iou_loss_weight'] * box_iou_loss) return total_loss, cls_loss, box_loss, box_iou_loss
def weight_variable(shape): # 权重和偏置的初始化 """Create a weight variable with appropriate initialization.""" initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial)
def __init__(self, dataset, hparams, forward_only=False): # dataset paramters self.dataset = dataset self.vocab_size = self.dataset.vocab_size self.review_size = self.dataset.review_size self.user_size = self.dataset.user_size self.product_size = self.dataset.product_size self.query_max_length = self.dataset.query_max_length self.vocab_distribute = self.dataset.vocab_distribute self.review_distribute = self.dataset.review_distribute self.product_distribute = self.dataset.product_distribute self.hparams = hparams self.negative_sample = self.hparams.negative_sample self.embed_size = self.hparams.embed_size self.window_size = self.hparams.window_size self.max_gradient_norm = self.hparams.max_gradient_norm self.init_learning_rate = self.hparams.init_learning_rate self.L2_lambda = self.hparams.L2_lambda self.net_struct = self.hparams.net_struct self.similarity_func = self.hparams.similarity_func self.query_weight = self.hparams.query_weight self.global_step = tf.Variable(0, trainable=False) self.print_ops = [] if self.query_weight >= 0: self.Wu = tf.Variable(self.query_weight, name="user_weight", dtype=tf.float32, trainable=False) else: self.Wu = tf.sigmoid( tf.Variable(0, name="user_weight", dtype=tf.float32)) # create placeholders self._create_placeholder() # specify model structure logging.info("Model Name " + self.net_struct) self.need_review = True if 'simplified' in self.net_struct: print('Simplified model') self.need_review = False self.need_context = False if 'hdc' in self.net_struct: print('Use context words') self.need_context = True if 'LSE' == self.net_struct: self.need_review = False self.need_context = True if self.need_context: self.context_word_idxs = [] for i in xrange(2 * self.window_size): self.context_word_idxs.append( tf.placeholder(tf.int64, shape=[None], name="context_idx{0}".format(i))) # Training losses. self.loss = None if 'LSE' == self.net_struct: self.loss = LSE.build_embedding_graph_and_loss(self) else: self.loss = HEM_builder.build_embedding_graph_and_loss(self) # Gradients and SGD update operation for training the model. params = tf.trainable_variables() if not forward_only: opt = tf.train.GradientDescentOptimizer(self.learning_rate) self.gradients = tf.gradients(self.loss, params) self.clipped_gradients, self.norm = tf.clip_by_global_norm( self.gradients, self.max_gradient_norm) self.updates = opt.apply_gradients(zip(self.clipped_gradients, params), global_step=self.global_step) #self.updates = opt.apply_gradients(zip(self.gradients, params), # global_step=self.global_step) else: if 'LSE' == self.net_struct: self.product_scores = LSE.get_product_scores( self, self.query_word_idxs) else: self.product_scores = HEM_builder.get_product_scores( self, self.user_idxs, self.query_word_idxs) # Add tf.summary scalar tf.summary.scalar('Learning_rate', self.learning_rate, collections=['train']) tf.summary.scalar('Loss', self.loss, collections=['train']) self.train_summary = tf.summary.merge_all(key='train') self.saver = tf.train.Saver(tf.global_variables())
v = DictVectorizer() X_train = v.fit_transform(train_data) X_test = v.transform(test_data) X_train = X_train[:1000, :] y_train = y_train[:1000, :] n, p = X_train.shape # number of latent factors k = 5 # design matrix X = tf.placeholder(tf.float32, shape=[n, p]) # target vector y = tf.placeholder(tf.float32, shape=[n, 1]) # bias and weights w0 = tf.Variable(tf.zeros([1])) W = tf.Variable(tf.zeros([p])) # interaction factors, randomly initialized V = tf.Variable(tf.random_normal([k, p], stddev=0.01)) # V # estimate of y, initialized to 0. y_hat = tf.Variable(tf.zeros([n, 1])) linear_terms = tf.add(w0, tf.reduce_sum(tf.multiply(W, X), 1, keepdims=True)) term1 = tf.pow(tf.matmul(X, tf.transpose(V)), 2) term2 = tf.matmul(tf.pow(X, 2), tf.pow(tf.transpose(V), 2)) interactions = tf.reduce_sum(tf.subtract(term1, term2), 1, keepdims=True) # L2 regularized sum of squares loss function over W and V lambda_w = tf.constant(0.001, name='lambda_w') lambda_v = tf.constant(0.001, name='lambda_v') l2_norm = tf.add(tf.reduce_sum(tf.multiply(lambda_w, tf.pow(W, 2))),
import tensorflow.compat.v1 as tf tf.disable_v2_behavior() tf.set_random_seed(777) x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 2], [6, 2]] y_data = [[0], [0], [0], [1], [1], [1]] x = tf.placeholder(dtype=tf.float32, shape=[None, 2]) # none은 앞에 들어오는건 상관없이 쓴다. batch size 맞춰주기 위해서. y = tf.placeholder(dtype=tf.float32, shape=[None, 1]) w = tf.Variable(tf.random_normal([2, 1])) b = tf.Variable(tf.random_normal([1])) hypothesis = tf.sigmoid(tf.matmul(x, w) + b) # x1w1 + x2w2 + b -> # 1/1+e^-(ax+b) cost = -tf.reduce_mean(y * tf.log(hypothesis) + (1 - y) * tf.log(1 - hypothesis)) # cross entropy update = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost) prediction = tf.cast(hypothesis > 0.5, dtype=tf.float32) # 크면 1 작으면 0 accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, y), dtype=tf.float32)) # casting 한다. 같으면 1 다르면 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(10000): _cost, _ = sess.run([cost, update], feed_dict={x: x_data, y: y_data}) if epoch % 200 == 0: print('epoch:{} cost:{}'.format(epoch, _cost)) _h, _p, _a = sess.run([hypothesis, prediction, accuracy], feed_dict={
# 네트워크 파라미터 설정 img_size = 256 regulation = 0.01 learning_rate = 0.001 batch_size = 10 dropout_rate = 0.1 # Placeholder 선언 input_data = tf.placeholder(tf.float32, [None, img_size, img_size, 3], name='input_data') input_label = tf.placeholder(tf.float32, [None, img_size, img_size, 3], name='input_label') # 가변 파라미터 설정 W1 = tf.Variable(tf.random_normal([3, 3, 3, 128], dtype=tf.float32, stddev=0.01), name='W_encoder1') b1 = tf.Variable(tf.zeros([128], dtype=tf.float32), name='b_encoder1') W2 = tf.Variable(tf.random_normal([3, 3, 128, 64], dtype=tf.float32, stddev=0.01), name='W_encoder2') b2 = tf.Variable(tf.zeros([64], dtype=tf.float32), name='b_encoder2') W3 = tf.Variable(tf.random_normal([3, 3, 64, 32], dtype=tf.float32, stddev=0.03), name='W_encoder3') b3 = tf.Variable(tf.zeros([32], dtype=tf.float32), name='b_encoder3') W4 = tf.Variable(tf.random_normal([3, 3, 32, 32], dtype=tf.float32, stddev=0.05),
from rlcard.utils.logger import plot # Make environment env = rlcard.make('no-limit-holdem') eval_env = rlcard.make('no-limit-holdem') # Set a global seed set_global_seed(0) ### Step 2: Initialize the NFSP agents. ### import tensorflow.compat.v1 as tf from rlcard.agents.nfsp_agent import NFSPAgent tf.disable_v2_behavior() memory_init_size = 1000 norm_step = 100 with tf.Session() as sess: # Set agents global_step = tf.Variable(0, name='global_step', trainable=False) agents = [] for i in range(env.player_num): agent = NFSPAgent(sess, scope='nfsp' + str(i), action_num=env.action_num, state_shape=env.state_shape, hidden_layers_sizes=[128, 128], min_buffer_size_to_learn=1000, q_replay_memory_init_size=memory_init_size, q_update_target_estimator_every=norm_step, q_mlp_layers=[128, 128]) agents.append(agent) # with sess.as_default(): #uncomment when loading # saver = tf.train.Saver() # saver.restore(sess, tf.train.latest_checkpoint(save_dir))
''' SFFF (S = start point, safe) FHFH (F = Frozen surface, safe) FFFH (H = hole) HFFG (G = goal, target) ''' tf.disable_v2_behavior() env = gym.make('FrozenLake-v0') tf.reset_default_graph() #These lines establish the feed-forward part of the network used to choose actions inputs1 = tf.placeholder(shape=[1, 16], dtype=tf.float32) W = tf.Variable(tf.random_uniform([16, 4], 0, 0.01)) Qout = tf.matmul(inputs1, W) predict = tf.argmax(Qout, 1) #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values. nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32) loss = tf.reduce_sum(tf.square(nextQ - Qout)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.1) updateModel = trainer.minimize(loss) init = tf.initialize_all_variables() # Set learning parameters y = .99 e = 0.1 num_episodes = 2000
def backword(mnist): # 给训练数据x,标签y_占位 x = tf.placeholder(tf.float32, [None, mnist_forward.INPUT_NODE]) y_ = tf.placeholder(tf.float32, [None, mnist_forward.OUTPUT_NODE]) # 使用前向传播过程,设置是否正则化,计算预测结果y y = mnist_forward.forward(x, REGULARIZER) # 轮数计数器,不可训练 global_step = tf.Variable(0, trainable=False) # 定义交叉熵损失 # 因为交叉熵一般和softmax回归一起使用, # 所以 tf.nn.sparse_softmax_cross_entropy_with_logits函数 # 对这两个功能进行了封装。 # 这里使用该函数进行加速交叉熵的计算, # 第一个参数是不包括softmax层的前向传播结果。 # 第二个参数是训练数据的正确答案, # 这里得到的是正确答案的这里使用该函数进行加速交叉熵的计算, # 第一个参数是不包括softmax层的前向传播结果。 # 第二个参数是训练数据的正确答案,这里得到的是正确答案的正确编号 ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax( y_, 1)) # 计算当前batch中所有样例的交叉熵平均值 cem = tf.reduce_mean(ce) # 总损失等于交叉熵损失和正则化损失的和 loss = cem + tf.add_n(tf.get_collection('losses')) # 设定指数衰减学习率 learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY, staircase=True) # 定义反向传播方法 train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss, global_step=global_step) # 滑动平均: 记录一段时间内模型的所有参数w和b各自的平均值,影子值,追随参数的变化而变化 # MOVING_AVERAGE_DECAY: 滑动平均衰减率 ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) ema_op = ema.apply(tf.trainable_variables()) # 在训练神经网络时,每过一遍数据既需要通过反向传播来更新神经神经网络的参数, # 又需要更新每一个参数的滑动平均值,这里的 tf.control_dependencies with tf.control_dependencies([train_step, ema_op]): train_op = tf.no_op(name='train') saver = tf.train.Saver() with tf.Session() as sess: # 初始化 tf.global_variables_initializer().run() ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) for i in range(STEPS): xs, ys = mnist.train.next_batch(BATCH_SIZE) _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={ x: xs, y_: ys }) if i % 1000 == 0: print("After %d training steps,loss on training batch is %g." % (step, loss_value)) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MINIST_data/", one_hot=True) import pylab import tensorflow.compat.v1 as tf tf.disable_eager_execution() tf.reset_default_graph() x = tf.placeholder(tf.float32, [None, 784]) y = tf.placeholder(tf.float32, [None, 10]) W = tf.Variable(tf.random_normal([784, 10])) b = tf.Variable(tf.zeros([10])) pred = tf.nn.softmax(tf.matmul(x, W) + b) cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1)) learning_rate = 0.01 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) training_epochs = 25 batch_size = 100 display_step = 1 saver = tf.train.Saver(max_to_keep=1) savedir = "model/" fileprefix = "handwriting.ckpt" with tf.Session() as sess:
train_labels = labels[:, :1000] test_labels = labels[:, 1000:] # 定义激活函数 active_func = tf.nn.sigmoid # active_func = tf.nn.relu # 输入为 14*n,每列为一个样本;输出为 1*n,每列为对应预测值 x = tf.placeholder(dtype=tf.float64, shape=(f_num, None), name='x') y = tf.placeholder(dtype=tf.float64, shape=(1, None), name='y') # 第一层8个神经元 8 * 14 l1_node = 8 layer1W = tf.Variable(tf.random_normal([l1_node, f_num], stddev=1, dtype=tf.float64), name='layer1Weights', dtype=tf.float64) layer1B = tf.Variable(tf.random_normal([l1_node, 1], stddev=1, dtype=tf.float64), name='layer1Bias', dtype=tf.float64) l1Output = tf.matmul(layer1W, x) + layer1B l1Output = active_func(l1Output) # 第二层6个神经元 6 * 8 l2_node = 6 layer2W = tf.Variable(tf.random_normal([l2_node, l1_node], stddev=1, dtype=tf.float64),
batch_size = 7 #how many windows of data we are passing at once window_size = 7 #how big window_size is (Or How many days do we consider to predict next point in the sequence) hidden_layer = 256 #How many units do we use in LSTM cell clip_margin = 4 #To prevent exploding gradient, we use clipper to clip gradients below -margin or above this margin learning_rate = 0.001 #This is a an optimization method that aims to reduce the loss function. #Learning Rate is a parameter of the Gradient Descent algorithm which helps us control #the change of weights for our network to the loss of gradient. epochs = 200 #one forward pass and one backward pass of all the training examples, This is the number of iterations (forward and back propagation) our model needs to make. #Placeholders allows us to send different data within our network with the tf.placeholder() command. inputs = tf.placeholder(tf.float32, [batch_size, window_size, 1]) targets = tf.placeholder(tf.float32, [batch_size, 1]) print("input shape:", inputs.shape) print("target shape:", targets.shape) #Output layer weigts weights_output = tf.Variable( tf.truncated_normal([hidden_layer, 1], stddev=0.05)) bias_output_layer = tf.Variable(tf.zeros([1])) #perform forward propagation to predict the output. # A list is initialized to store the predicted output outputs = [] #for each iteration output is computed and stored in the outputs list for i in range( batch_size ): # Iterates through every window in the batch. The Batch Size refers to the number of training samples propagated through the network # for each batch creating batch_state as all zeros and output for that window which is all zeros at the beginning as well. #initialize hidden state and cell state. np.zeros() Return a new array of given shape and type, filled with zeros. cell_state = np.zeros([1, hidden_layer], dtype=np.float32) hidden_state = np.zeros([1, hidden_layer], dtype=np.float32) #print("hidden state:", hidden_state)
def __init__(self, lr_rate=0.001, regular=0.005, trainable=False): self.parameter = [] with tf.name_scope('input_layer'): self.input_x = tf.placeholder(dtype=tf.float32, shape=[None, 227, 227, 3], name='input_x') self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, 1000], name='input_y') with tf.name_scope('first_conv_layer_part1'): kernel1_1 = tf.Variable( tf.truncated_normal(shape=[11, 11, 3, 48], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias1_1 = tf.Variable(tf.constant(value=0, shape=[48], dtype=tf.float32), name='kernel_bias') conv1_1 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(self.input_x, kernel1_1, strides=[1, 4, 4, 1], padding='VALID'), bias1_1)) lrn1_1 = tf.nn.local_response_normalization(conv1_1, depth_radius=2, bias=1, alpha=2e-05, beta=0.75) self.parameter.append([kernel1_1, bias1_1]) with tf.name_scope('first_conv_layer_part2'): kernel1_2 = tf.Variable( tf.truncated_normal(shape=[11, 11, 3, 48], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias1_2 = tf.Variable(tf.constant(value=0, shape=[48], dtype=tf.float32), name='kernel_bias') conv1_2 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(self.input_x, kernel1_2, strides=[1, 4, 4, 1], padding='VALID'), bias1_2)) lrn1_2 = tf.nn.local_response_normalization(conv1_2, depth_radius=2, bias=1, alpha=2e-05, beta=0.75) self.parameter.append([kernel1_2, bias1_2]) with tf.name_scope('first_maxpool_layer_part1'): maxpool1_1 = tf.nn.max_pool(lrn1_1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID') with tf.name_scope('fisrt_maxpool_layer_part2'): maxpool1_2 = tf.nn.max_pool(lrn1_2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID') with tf.name_scope('second_conv_layer_part1'): kernel2_1 = tf.Variable( tf.truncated_normal(shape=[5, 5, 48, 128], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias2_1 = tf.Variable(tf.constant(value=1, shape=[128], dtype=tf.float32), name='kernel_bias') conv2_1 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(maxpool1_1, kernel2_1, strides=[1, 1, 1, 1], padding='SAME'), bias2_1)) lrn2_1 = tf.nn.local_response_normalization(conv2_1, depth_radius=2, bias=1, alpha=2e-05, beta=0.75) self.parameter.append([kernel2_1, bias2_1]) with tf.name_scope('second_conv_layer_part2'): kernel2_2 = tf.Variable( tf.truncated_normal(shape=[5, 5, 48, 128], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias2_2 = tf.Variable(tf.constant(value=0, shape=[128], dtype=tf.float32), name='kernel_bias') conv2_2 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(maxpool1_2, kernel2_2, strides=[1, 1, 1, 1], padding='SAME'), bias2_2)) lrn2_2 = tf.nn.local_response_normalization(conv2_2, depth_radius=2, bias=1, alpha=2e-05, beta=0.75) self.parameter.append([kernel2_2, bias2_2]) with tf.name_scope('second_maxpool_layer_part1'): maxpool2_1 = tf.nn.max_pool(lrn2_1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID') with tf.name_scope('second_maxpool_layer_part2'): maxpool2_2 = tf.nn.max_pool(lrn2_2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID') '''in paper conv3 have four conv kernels so there have four, in many codes for alexnet they have only one kernel have shape=[3, 3, 256, 384] they are same,because 128X2=256, 192X2=384''' with tf.name_scope('third_conv_layer_part1'): kernel3_1 = tf.Variable( tf.truncated_normal(shape=[3, 3, 128, 192], stddev=0.01, dtype=tf.float32), name='kernel_weight1', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) conv3_1 = tf.nn.conv2d(maxpool2_1, kernel3_1, strides=[1, 1, 1, 1], padding='SAME') kernel3_2 = tf.Variable( tf.truncated_normal(shape=[3, 3, 128, 192], stddev=0.01, dtype=tf.float32), name="kernel_weight2", collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) conv3_2 = tf.nn.conv2d(maxpool2_1, kernel3_2, strides=[1, 1, 1, 1], padding='SAME') self.parameter.append([kernel3_1, kernel3_2]) with tf.name_scope('third_conv_layer_part2'): kernel3_3 = tf.Variable( tf.truncated_normal(shape=[3, 3, 128, 192], stddev=0.01, dtype=tf.float32), name='kernel_weight1', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) conv3_3 = tf.nn.conv2d(maxpool2_2, kernel3_3, strides=[1, 1, 1, 1], padding='SAME') kernel3_4 = tf.Variable( tf.truncated_normal(shape=[3, 3, 128, 192], stddev=0.01, dtype=tf.float32), name='kernel_weight2', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) conv3_4 = tf.nn.conv2d(maxpool2_2, kernel3_4, strides=[1, 1, 1, 1], padding='SAME') self.parameter.append([kernel3_3, kernel3_4]) with tf.name_scope('make_two_as_one'): bias3_1 = tf.Variable(tf.constant(value=1, shape=[192], dtype=tf.float32), name='bias3_1') bias3_2 = tf.Variable(tf.constant(value=1, shape=[192], dtype=tf.float32), name='bias3_1') conv3_out1 = tf.nn.bias_add(tf.nn.relu(conv3_1 + conv3_3), bias3_1) conv3_out2 = tf.nn.bias_add(tf.nn.relu(conv3_2 + conv3_4), bias3_2) self.parameter.append([bias3_1, bias3_2]) with tf.name_scope('fourth_conv_layer_part1'): kernel4_1 = tf.Variable( tf.truncated_normal(shape=[3, 3, 192, 192], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias4_1 = tf.Variable(tf.constant(value=1, shape=[192], dtype=tf.float32), name='kernel_bias') conv4_1 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(conv3_out1, kernel4_1, strides=[1, 1, 1, 1], padding='SAME'), bias4_1)) self.parameter.append([kernel4_1, bias4_1]) with tf.name_scope('fourth_conv_layer_part2'): kernel4_2 = tf.Variable( tf.truncated_normal(shape=[3, 3, 192, 192], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias4_2 = tf.Variable(tf.constant(value=1, shape=[192], dtype=tf.float32), name='kernel_bias') conv4_2 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(conv3_out2, kernel4_2, strides=[1, 1, 1, 1], padding='SAME'), bias4_2)) self.parameter.append([kernel4_2, bias4_2]) with tf.name_scope('fifth_conv_layer_part1'): kernel5_1 = tf.Variable( tf.truncated_normal(shape=[3, 3, 192, 128], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias5_1 = tf.Variable(tf.constant(value=1, shape=[128], dtype=tf.float32), name='kernel_bias') conv5_1 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(conv4_1, kernel5_1, strides=[1, 1, 1, 1], padding='SAME'), bias5_1)) self.parameter.append([kernel5_1, bias5_1]) with tf.name_scope('fifth_conv_layer_part2'): kernel5_2 = tf.Variable( tf.truncated_normal(shape=[3, 3, 192, 128], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias5_2 = tf.Variable(tf.constant(value=1, shape=[128], dtype=tf.float32), name='kernel_bias') conv5_2 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(conv4_2, kernel5_2, strides=[1, 1, 1, 1], padding='SAME'), bias5_2)) self.parameter.append([kernel5_2, bias5_2]) with tf.name_scope('fifth_maxpool_layer_part1'): maxpool5_1 = tf.nn.max_pool(conv5_1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID') with tf.name_scope('fifth_maxpool_layer_part2'): maxpool5_2 = tf.nn.max_pool(conv5_2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID') conv_out = tf.concat([maxpool5_1, maxpool5_2], 3) dim_list = conv_out.get_shape().as_list()[1:] shape_dim = np.prod(dim_list) reshaped = tf.reshape(conv_out, [-1, shape_dim]) with tf.name_scope('first_fc_layer'): weight1 = tf.Variable( tf.truncated_normal(shape=[shape_dim, 4096], stddev=0.01, dtype=tf.float32), name='fc_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias6 = tf.Variable(tf.constant(shape=[4096], value=1, dtype=tf.float32), name='fc_bias') fc1_out = tf.nn.relu( tf.nn.bias_add(tf.matmul(reshaped, weight1), bias6)) if trainable: fc1_out = tf.nn.dropout(fc1_out, rate=0.5) self.parameter.append([weight1, bias6]) with tf.name_scope('second_fc_layer'): weight2 = tf.Variable( tf.truncated_normal(shape=[4096, 4096], stddev=0.01, dtype=tf.float32), name='fc_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias7 = tf.Variable(tf.constant(shape=[4096], value=1, dtype=tf.float32), name='fc_bias') fc2_out = tf.nn.relu( tf.nn.bias_add(tf.matmul(fc1_out, weight2), bias7)) if trainable: fc2_out = tf.nn.dropout(fc2_out, rate=0.5) self.parameter.append([weight2, bias7]) with tf.name_scope('thrid_fc_layer'): weight3 = tf.Variable( tf.truncated_normal(shape=[4096, 1000], stddev=0.01), dtype=tf.float32, name='fc_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias8 = tf.Variable(tf.constant(shape=[1000], value=1, dtype=tf.float32), name='fc_bias') self.out = tf.nn.softmax( tf.nn.bias_add(tf.matmul(fc2_out, weight3), bias8)) self.parameter.append([weight3, bias8]) with tf.name_scope('loss'): regulation_loss = 0 for i in tf.get_collection('loss'): tensor = tf.get_default_graph().get_tensor_by_name(i.name) regulation_loss += tf.nn.l2_loss(tensor) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.input_y, logits=self.out))\ + regulation_loss * regular self.train_loss_op = tf.train.GradientDescentOptimizer( lr_rate).minimize(self.loss) with tf.name_scope('accuracy'): self.accuracy = tf.reduce_mean( tf.cast( tf.equal(tf.argmax(self.out, axis=1), tf.argmax(self.input_y, axis=1)), tf.float32))
# init W_skipconn2even channel input k = 0 for j in range(0, code_PCM.shape[1], 1): for i in range(0, code_PCM.shape[0], 1): if (code_PCM[i, j] == 1): W_skipconn2even[j, k] = 1.0 k += 1 ############################## bulid four neural networks(Z = 16,3, 10, 6) ############################ net_dict = {} # init the learnable network parameters Weights_Var = np.ones(sum_edge, dtype=np.float32) Biases_Var = np.zeros(sum_edge, dtype=np.float32) for i in range(0, iters_max, 1): net_dict["Weights_Var{0}".format(i)] = tf.Variable( Weights_Var.copy(), name="Weights_Var".format(i)) net_dict["Biases_Var{0}".format(i)] = tf.Variable( Biases_Var.copy(), name="Biases_Var".format(i)) # the decoding neural network of Z=16 Z = 16 xa = tf.placeholder(tf.float32, shape=[batch_size, N, Z], name='xa') ya = tf.placeholder(tf.float32, shape=[batch_size, N * Z], name='ya') xa_input = tf.transpose(xa, [0, 2, 1]) net_dict["LLRa{0}".format(0)] = tf.zeros((batch_size, Z, sum_edge), dtype=tf.float32) for i in range(0, iters_max, 1): #variable node update x0 = tf.matmul(xa_input, W_skipconn2even) x1 = tf.matmul(net_dict["LLRa{0}".format(i)], W_odd2even) x2 = tf.add(x0, x1)
def __init__(self, lr_rate=0.001, regular=0.005): self.parameter = [] with tf.name_scope('input_layer'): self.input_x = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3], name='input_x') self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, 1000], name='input_y') with tf.name_scope('first_conv_layer'): kernel = tf.Variable( tf.truncated_normal(shape=[11, 11, 3, 96], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias = tf.Variable(tf.constant(value=1, shape=[96], dtype=tf.float32), name='kernel_bias') conv1 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(self.input_x, kernel, strides=[1, 4, 4, 1], padding='VALID'), bias)) self.parameter.append([kernel, bias]) with tf.name_scope('first_maxpoll_layer'): maxpool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID') with tf.name_scope('second_conv_layer'): kernel2 = tf.Variable( tf.truncated_normal(shape=[5, 5, 96, 256], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias2 = tf.Variable(tf.constant(value=1, shape=[256], dtype=tf.float32), name='kernel_bias') conv2 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(maxpool1, kernel2, strides=[1, 1, 1, 1], padding='SAME'), bias2)) self.parameter.append([kernel2, bias2]) with tf.name_scope('second_maxpool_layer'): maxpool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID') with tf.name_scope('third_conv_layer'): kernel3 = tf.Variable( tf.truncated_normal(shape=[3, 3, 256, 384], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias3 = tf.Variable(tf.constant(value=1, shape=[384], dtype=tf.float32), name='kernel_bias') conv3 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(maxpool2, kernel3, strides=[1, 1, 1, 1], padding='SAME'), bias3)) self.parameter.append([kernel3, bias3]) with tf.name_scope('fourth_conv_layer'): kernel4 = tf.Variable( tf.truncated_normal(shape=[3, 3, 384, 384], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias4 = tf.Variable(tf.constant(value=1, shape=[384], dtype=tf.float32), name='kernel_bias') conv4 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(conv3, kernel4, strides=[1, 1, 1, 1], padding='SAME'), bias4)) self.parameter.append([kernel4, bias4]) with tf.name_scope('fifth_conv_layer'): kernel5 = tf.Variable( tf.truncated_normal(shape=[3, 3, 384, 256], stddev=0.01, dtype=tf.float32), name='kernel_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias5 = tf.Variable(tf.constant(value=1, shape=[256], dtype=tf.float32), name='kernel_bias') conv5 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(conv4, kernel5, strides=[1, 1, 1, 1], padding='SAME'), bias5)) self.parameter.append([kernel5, bias5]) dim_list = conv5.get_shape().as_list()[1:] shape_dim = np.prod(dim_list) reshaped = tf.reshape(conv5, [-1, shape_dim]) with tf.name_scope('first_fc_layer'): weight1 = tf.Variable( tf.truncated_normal(shape=[shape_dim, 4096], stddev=0.01, dtype=tf.float32), name='fc_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias6 = tf.Variable(tf.constant(shape=[4096], value=1, dtype=tf.float32), name='fc_bias') fc1_out = tf.nn.relu( tf.nn.bias_add(tf.matmul(reshaped, weight1), bias6)) drop1 = tf.nn.dropout(fc1_out, rate=0.5) self.parameter.append([weight1, bias6]) with tf.name_scope('second_fc_layer'): weight2 = tf.Variable( tf.truncated_normal(shape=[4096, 4096], stddev=0.01, dtype=tf.float32), name='fc_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias7 = tf.Variable(tf.constant(shape=[4096], value=1, dtype=tf.float32), name='fc_bias') fc2_out = tf.nn.relu( tf.nn.bias_add(tf.matmul(drop1, weight2), bias7)) drop2 = tf.nn.dropout(fc2_out, rate=0.5) self.parameter.append([weight2, bias7]) with tf.name_scope('thrid_fc_layer'): weight3 = tf.Variable( tf.truncated_normal(shape=[4096, 1000], stddev=0.01), dtype=tf.float32, name='fc_weight', collections=[tf.GraphKeys.GLOBAL_VARIABLES, 'loss']) bias8 = tf.Variable(tf.constant(shape=[1000], value=1, dtype=tf.float32), name='fc_bias') self.out = tf.nn.softmax( tf.nn.bias_add(tf.matmul(drop2, weight3), bias8)) self.parameter.append([weight3, bias8]) with tf.name_scope('loss'): regulation_loss = 0 for i in tf.get_collection('loss'): tensor = tf.get_default_graph().get_tensor_by_name(i.name) regulation_loss += tf.nn.l2_loss(tensor) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.input_y, logits=self.out))\ + regulation_loss * regular self.train_loss_op = tf.train.GradientDescentOptimizer( lr_rate).minimize(self.loss) with tf.name_scope('accuracy'): self.accuracy = tf.reduce_mean( tf.cast( tf.equal(tf.argmax(self.out, axis=1), tf.argmax(self.input_y, axis=1)), tf.float32))
#print(l_t_matrix) #print(max_val) #print(non_zero) # Build model. num_input = num_users num_hidden_1 = 10 num_hidden_2 = 5 X = tf.placeholder(tf.float64, [None, num_input]) weights = { 'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1], dtype=tf.float64)), 'encoder_h2': tf.Variable( tf.random_normal([num_hidden_1, num_hidden_2], dtype=tf.float64)), 'decoder_h1': tf.Variable( tf.random_normal([num_hidden_2, num_hidden_1], dtype=tf.float64)), 'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input], dtype=tf.float64)), } biases = { 'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)), 'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2], dtype=tf.float64)),
def train(train_list, val_list, debug_mode=True): print('Running PRLNet -Training!') # create folders to save trained model and results graph_dir = './graph' checkpt_dir = './model' ouput_dir = './output' exists_or_mkdir(graph_dir, need_remove=True) exists_or_mkdir(ouput_dir) exists_or_mkdir(checkpt_dir) # --------------------------------- load data --------------------------------- # data fetched at range: [-1,1] input_imgs, target_imgs, num = input_producer(train_list, in_channels, batch_size, need_shuffle=True) if debug_mode: input_val, target_val, num_val = input_producer(val_list, in_channels, batch_size, need_shuffle=False) pred_content, pred_detail, pred_imgs = gen_PRLNet(input_imgs, out_channels, is_train=True, reuse=False) if debug_mode: _, _, pred_val = gen_PRLNet(input_val, out_channels, is_train=False, reuse=True) # --------------------------------- loss terms --------------------------------- with tf.name_scope('Loss') as loss_scp: target_224 = tf.image.resize_images(target_imgs, size=[224, 224], method=0, align_corners=False) predict_224 = tf.image.resize_images(pred_imgs, size=[224, 224], method=0, align_corners=False) vgg19_api = VGG19("vgg19.npy") vgg_map_targets = vgg19_api.build((target_224 + 1) / 2, is_rgb=(in_channels == 3)) vgg_map_predict = vgg19_api.build((predict_224 + 1) / 2, is_rgb=(in_channels == 3)) content_loss = tf.losses.mean_squared_error(target_imgs, pred_content) vgg_loss = 2e-6 * tf.losses.mean_squared_error(vgg_map_targets, vgg_map_predict) l1_loss = tf.reduce_mean(tf.abs(target_imgs - pred_imgs)) mse_loss = tf.losses.mean_squared_error(target_imgs, pred_imgs) loss_op = content_loss + 2 * vgg_loss + l1_loss # --------------------------------- solver definition --------------------------------- global_step = tf.Variable(0, name='global_step', trainable=False) iters_per_epoch = np.floor_divide(num, batch_size) lr_decay = tf.train.polynomial_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=iters_per_epoch * n_epochs, end_learning_rate=learning_rate / 100.0, power=0.9) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.name_scope('optimizer'): with tf.control_dependencies(update_ops): gen_vars = [ var for var in tf.trainable_variables() if var.name.startswith("PRLNet") ] gen_optim = tf.train.AdamOptimizer(lr_decay, beta1) gen_grads_and_vars = gen_optim.compute_gradients(loss_op, var_list=gen_vars) train_op = gen_optim.apply_gradients(gen_grads_and_vars, global_step=global_step) # --------------------------------- model training --------------------------------- ''' if debug_mode: with tf.name_scope('summarise') as sum_scope: tf.summary.scalar('loss', loss_op) tf.summary.scalar('learning rate', lr_decay) tf.summary.image('predicts', pred_imgs, max_outputs=9) summary_op = tf.summary.merge_all() ''' with tf.name_scope("parameter_count"): num_parameters = tf.reduce_sum( [tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()]) # set GPU resources config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = 0.45 saver = tf.train.Saver(max_to_keep=1) loss_list = [] psnr_list = [] with tf.Session(config=config) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) sess.run(tf.global_variables_initializer()) print(">>------------>>> [Training_Num] =%d" % num) print(">>------------>>> [Parameter_Num] =%d" % sess.run(num_parameters)) ''' if debug_mode: with tf.name_scope(sum_scope): summary_writer = tf.summary.FileWriter(graph_dir, graph=sess.graph) ''' for epoch in range(0, n_epochs): start_time = time.time() epoch_loss, n_iters = 0, 0 for step in range(0, num, batch_size): _, loss = sess.run([train_op, loss_op]) epoch_loss += loss n_iters += 1 # iteration information if n_iters % display_steps == 0: tm = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S.%f') print("%s >> [%d/%d] iter: %d loss: %4.4f" % (tm, epoch, n_epochs, n_iters, loss)) ''' if debug_mode: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) ''' # epoch information epoch_loss = epoch_loss / n_iters loss_list.append(epoch_loss) print( "[*] ----- Epoch: %d/%d | Loss: %4.4f | Time-consumed: %4.3f -----" % (epoch, n_epochs, epoch_loss, (time.time() - start_time))) if (epoch + 1) % save_epochs == 0: if debug_mode: print("----- validating model ...") mean_psnr, nn = 0, 0 for idx in range(0, num_val, batch_size): predicts, groundtruths = sess.run( [pred_val, target_val]) save_images_from_batch(predicts, ouput_dir, idx) psnr = measure_psnr(predicts, groundtruths) mean_psnr += psnr nn += 1 psnr_list.append(mean_psnr / nn) print("----- psnr:%4.4f" % (mean_psnr / nn)) print("----- saving model ...") saver.save(sess, os.path.join(checkpt_dir, "model.cpkt"), global_step=global_step) save_list(os.path.join(ouput_dir, "loss"), loss_list) save_list(os.path.join(ouput_dir, "psnr"), psnr_list) # stop data queue coord.request_stop() coord.join(threads) # write out the loss list save_list(os.path.join(ouput_dir, "loss"), loss_list) save_list(os.path.join(ouput_dir, "psnr"), psnr_list) print("Training finished!") return None
def initialise_model(self, numpy_embedding): """ Initialises the TensorFlow Attract-Repel model. """ self.attract_examples = tf.placeholder( tf.int32, [None, 2]) # each element is the position of word vector. self.repel_examples = tf.placeholder( tf.int32, [None, 2]) # each element is again the position of word vector. self.negative_examples_attract = tf.placeholder(tf.int32, [None, 2]) self.negative_examples_repel = tf.placeholder(tf.int32, [None, 2]) self.attract_margin = tf.placeholder("float") self.repel_margin = tf.placeholder("float") self.regularisation_constant = tf.placeholder("float") # Initial (distributional) vectors. Needed for L2 regularisation. self.W_init = tf.constant(numpy_embedding, name="W_init") # Variable storing the updated word vectors. self.W_dynamic = tf.Variable(numpy_embedding, name="W_dynamic") # Attract Cost Function: # placeholders for example pairs... attract_examples_left = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.attract_examples[:, 0]), 1) attract_examples_right = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.attract_examples[:, 1]), 1) # and their respective negative examples: negative_examples_attract_left = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.negative_examples_attract[:, 0]), 1) negative_examples_attract_right = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.negative_examples_attract[:, 1]), 1) # dot product between the example pairs. attract_similarity_between_examples = tf.reduce_sum( tf.multiply(attract_examples_left, attract_examples_right), 1) # dot product of each word in the example with its negative example. attract_similarity_to_negatives_left = tf.reduce_sum( tf.multiply(attract_examples_left, negative_examples_attract_left), 1) attract_similarity_to_negatives_right = tf.reduce_sum( tf.multiply(attract_examples_right, negative_examples_attract_right), 1) # and the final Attract Cost Function (sans regularisation): self.attract_cost = tf.nn.relu( self.attract_margin + attract_similarity_to_negatives_left - attract_similarity_between_examples) + \ tf.nn.relu( self.attract_margin + attract_similarity_to_negatives_right - attract_similarity_between_examples) # Repel Cost Function: # placeholders for example pairs... repel_examples_left = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.repel_examples[:, 0]), 1) # becomes batch_size X vector_dimension repel_examples_right = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.repel_examples[:, 1]), 1) # and their respective negative examples: negative_examples_repel_left = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.negative_examples_repel[:, 0]), 1) negative_examples_repel_right = tf.nn.l2_normalize( tf.nn.embedding_lookup(self.W_dynamic, self.negative_examples_repel[:, 1]), 1) # dot product between the example pairs. repel_similarity_between_examples = tf.reduce_sum( tf.multiply(repel_examples_left, repel_examples_right), 1) # becomes batch_size again, might need tf.squeeze # dot product of each word in the example with its negative example. repel_similarity_to_negatives_left = tf.reduce_sum( tf.multiply(repel_examples_left, negative_examples_repel_left), 1) repel_similarity_to_negatives_right = tf.reduce_sum( tf.multiply(repel_examples_right, negative_examples_repel_right), 1) # and the final Repel Cost Function (sans regularisation): self.repel_cost = tf.nn.relu( self.repel_margin - repel_similarity_to_negatives_left + repel_similarity_between_examples) + \ tf.nn.relu( self.repel_margin - repel_similarity_to_negatives_right + repel_similarity_between_examples) # The Regularisation Cost (separate for the two terms, depending on which one is called): # load the original distributional vectors for the example pairs: original_attract_examples_left = tf.nn.embedding_lookup( self.W_init, self.attract_examples[:, 0]) original_attract_examples_right = tf.nn.embedding_lookup( self.W_init, self.attract_examples[:, 1]) original_repel_examples_left = tf.nn.embedding_lookup( self.W_init, self.repel_examples[:, 0]) original_repel_examples_right = tf.nn.embedding_lookup( self.W_init, self.repel_examples[:, 1]) # and then define the respective regularisation costs: regularisation_cost_attract = self.regularisation_constant * ( tf.nn.l2_loss(original_attract_examples_left - attract_examples_left) + tf.nn.l2_loss(original_attract_examples_right - attract_examples_right)) self.attract_cost += regularisation_cost_attract regularisation_cost_repel = self.regularisation_constant * ( tf.nn.l2_loss(original_repel_examples_left - repel_examples_left) + tf.nn.l2_loss(original_repel_examples_right - repel_examples_right)) self.repel_cost += regularisation_cost_repel # Finally, we define the training step functions for both steps. tvars = tf.trainable_variables() attract_grads = [ tf.clip_by_value(grad, -2., 2.) for grad in tf.gradients(self.attract_cost, tvars) ] repel_grads = [ tf.clip_by_value(grad, -2., 2.) for grad in tf.gradients(self.repel_cost, tvars) ] attract_optimiser = tf.train.AdagradOptimizer(0.05) repel_optimiser = tf.train.AdagradOptimizer(0.05) self.attract_cost_step = attract_optimiser.apply_gradients( list(zip(attract_grads, tvars))) self.repel_cost_step = repel_optimiser.apply_gradients( list(zip(repel_grads, tvars))) # return the handles for loading vectors from the TensorFlow embeddings: return attract_examples_left, attract_examples_right, repel_examples_left, repel_examples_right
def __init__(self, num_classes, placeholders, features, adj, degrees, layer_infos, concat=True, aggregator_type="mean", model_size="small", sigmoid_loss=False, identity_dim=0, **kwargs): ''' Args: - placeholders: Stanford TensorFlow placeholder object. - features: Numpy array with node features. - adj: Numpy array with adjacency lists (padded with random re-samples) - degrees: Numpy array with node degrees. - layer_infos: List of SAGEInfo namedtuples that describe the parameters of all the recursive layers. See SAGEInfo definition above. - concat: whether to concatenate during recursive iterations - aggregator_type: how to aggregate neighbor information - model_size: one of "small" and "big" - sigmoid_loss: Set to true if nodes can belong to multiple classes ''' models.GeneralizedModel.__init__(self, **kwargs) if aggregator_type == "mean": self.aggregator_cls = MeanAggregator elif aggregator_type == "seq": self.aggregator_cls = SeqAggregator elif aggregator_type == "meanpool": self.aggregator_cls = MeanPoolingAggregator elif aggregator_type == "maxpool": self.aggregator_cls = MaxPoolingAggregator elif aggregator_type == "gcn": self.aggregator_cls = GCNAggregator else: raise Exception("Unknown aggregator: ", self.aggregator_cls) # get info from placeholders... self.inputs1 = placeholders["batch"] self.model_size = model_size self.adj_info = adj if identity_dim > 0: self.embeds = tf.get_variable( "node_embeddings", [adj.get_shape().as_list()[0], identity_dim]) else: self.embeds = None if features is None: if identity_dim == 0: raise Exception( "Must have a positive value for identity feature dimension if no input features given." ) self.features = self.embeds else: self.features = tf.Variable(tf.constant(features, dtype=tf.float32), trainable=False) if not self.embeds is None: self.features = tf.concat([self.embeds, self.features], axis=1) self.degrees = degrees self.concat = concat self.num_classes = num_classes self.sigmoid_loss = sigmoid_loss self.dims = [ (0 if features is None else features.shape[1]) + identity_dim ] self.dims.extend( [layer_infos[i].output_dim for i in range(len(layer_infos))]) self.batch_size = placeholders["batch_size"] self.placeholders = placeholders self.layer_infos = layer_infos self.optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.learning_rate) self.build()
def weight_variable(shape, stddev): initial = tf.truncated_normal(shape, stddev=stddev) return tf.Variable(initial)
def batch_norm_wrapper(self, inputs, batch_name, is_training=True, epsilon=1e-05, decay=0.9): """ Layer to handle batch norm training and inference Parameters ---------- inputs: TensorFlow Tensor 4d tensor of NHWC format batch_name: string Name for the batch norm layer is_training: bool True if training and False if running validation; updates based on is_train from params epsilon: float Small, non-zero value added to variance to avoid divide-by-zero error decay: float Decay for the moving average Returns ------- return: TensorFlow Tensor Result of batch norm layer """ dim_of_x = inputs.get_shape()[-1] shadow_mean = _tf.Variable( _tf.zeros(shape=[dim_of_x], dtype="float32"), name=batch_name + "running_mean", trainable=False, ) shadow_var = _tf.Variable( _tf.ones(shape=[dim_of_x], dtype="float32"), name=batch_name + "running_var", trainable=False, ) axes = list(range(len(inputs.get_shape()) - 1)) # Calculate mean and variance for a batch batch_mean, batch_var = _tf.nn.moments(inputs, axes, name="moments") def mean_var_update(): with _tf.control_dependencies([ _tf.assign( shadow_mean, _tf.multiply(shadow_mean, decay) + _tf.multiply(batch_mean, 1.0 - decay), ), _tf.assign( shadow_var, _tf.multiply(shadow_var, decay) + _tf.multiply(batch_var, 1.0 - decay), ), ]): return _tf.identity(batch_mean), _tf.identity(batch_var) mean, variance = _tf.cond( _tf.cast(is_training, _tf.bool), mean_var_update, lambda: (_tf.identity(shadow_mean), _tf.identity(shadow_var)), ) beta = _tf.Variable( _tf.zeros(shape=dim_of_x, dtype="float32"), name=batch_name + "beta", trainable=True, ) # Offset/Shift gamma = _tf.Variable( _tf.ones(shape=dim_of_x, dtype="float32"), name=batch_name + "gamma", trainable=True, ) # Scale return _tf.nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon)