def add_critic_network_op(self, scope="critic"): state_tensor = self.observation_placeholder if self.use_cnn: state_tensor = build_cnn(state_tensor, scope) elif self.use_small_cnn: state_tensor = build_small_cnn(state_tensor, scope) self.baseline = tf.squeeze(build_mlp(state_tensor, 1, scope, self.n_layers, self.layer_size), axis=1) self.baseline_target_placeholder = tf.placeholder(tf.float32, shape=[None]) global_step = tf.train.get_or_create_global_step() print("[critic]num_env_frames", global_step) learning_rate = tf.train.polynomial_decay( self.lr_critic, global_step, self.config.number_of_iterations, 0) #learning_rate = tf.train.exponential_decay(self.lr_critic, # self.config.number_of_iterations, # 1000, 0.96, staircase=False) tf.summary.scalar("lr/critic", learning_rate) self.critic_loss = tf.losses.mean_squared_error( self.baseline, self.baseline_target_placeholder, scope=scope) #tf.summary.scalar("loss/actor", critic_loss) #self.update_critic_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.critic_loss) self.update_critic_op = tf.train.RMSPropOptimizer( learning_rate=learning_rate, momentum=0, epsilon=0.01).minimize(self.critic_loss)
def exportPbFile(): sess = tf.Session() graph = build_cnn(Config(5, 5)) tf.train.write_graph(sess.graph_def, './', 'graph.pb') saver = tf.train.Saver() print(saver.saver_def.restore_op_name) print(saver.saver_def.filename_tensor_name) print(graph["predicted_val_top_k"].name) print(graph["predicted_index_top_k"].name) print(graph["images"]) print(graph["keep_prob"])
def train_pipeline(X_train, y_train, X_valid, y_valid, epochs=200, batch_size=32, patience=10, model_save_loc="model_simple.hdf5", monitor="val_loss", mode="min", factor=0.1, lr_patience=5, min_lr=0.00001): # Модели не используются сингл для финального решения. А в ансамбле бывает, что лучше юзать # модель с низким лоссом или просто обученную по-другому для вариативности. # monitor = "val_loss" mode = "min" early_stop = EarlyStopping(patience=patience, verbose=1, monitor=monitor, mode=mode) check_point = ModelCheckpoint(model_save_loc, save_best_only=True, verbose=1, monitor=monitor, mode=mode) reduce_lr = ReduceLROnPlateau(factor=factor, patience=lr_patience, min_lr=min_lr, verbose=1, monitor=monitor, mode=mode) model = build_cnn() print(model.summary()) model.fit(X_train, y_train, epochs=epochs, validation_data=(X_valid, y_valid), callbacks=[early_stop, check_point, reduce_lr], batch_size=batch_size) return model, model.evaluate(X_valid, y_valid, batch_size=batch_size)
def add_actor_network_op(self, idx, scope="actor"): state_tensor = self.observation_placeholder if self.heterogeneity_cnn: print(self.cnn_big_little_config[self.cnn_big_little_map[idx]][0]) state_tensor = build_configurable_cnn( state_tensor, self.cnn_big_little_config[self.cnn_big_little_map[idx]][0], self.cnn_big_little_config[self.cnn_big_little_map[idx]][1], scope) elif self.use_cnn: state_tensor = build_cnn(state_tensor, scope) elif self.use_small_cnn: state_tensor = build_small_cnn(state_tensor, scope) print("state_tensor.get_shape()") print(state_tensor.get_shape()) if self.heterogeneity: action_logits = build_mlp( state_tensor, self.action_dim, scope, self.mlp_big_little_config[self.mlp_big_little_map[idx]][0], self.mlp_big_little_config[self.mlp_big_little_map[idx]][1]) else: dropout = self.dropout print("Dropout rate", dropout) action_logits = build_mlp(state_tensor, self.action_dim, scope, self.n_layers, self.layer_size, dropout=dropout) print("action_logits.get_shape()") print(action_logits.get_shape()) policy_entropy = -tf.reduce_sum( tf.nn.softmax(action_logits) * tf.nn.log_softmax(action_logits), -1) print(policy_entropy.get_shape()) policy_entropy = tf.reduce_sum(policy_entropy) #self.sampled_action = tf.squeeze(tf.multinomial(action_logits, 1), axis=1) # Add one more sampled action self.sampled_action_list.append( tf.squeeze(tf.multinomial(action_logits, 1), axis=1)) logprob = -1 * tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.action_placeholder, logits=action_logits) actor_loss = -tf.reduce_mean(logprob * self.advantage_placeholder) actor_loss = actor_loss - policy_entropy * 0.0001 global_step = tf.train.get_or_create_global_step() if idx == 0: tf.summary.scalar("debug/global_step", global_step) print("[actor]num_env_frames", global_step) if idx == 0: # only the first actor updates lr learning_rate = tf.train.polynomial_decay( self.lr_actor, global_step, self.config.number_of_iterations, 0) self.lr_actor_op = learning_rate else: learning_rate = self.lr_actor_op #learning_rate = tf.train.exponential_decay(self.config.lr_actor, # self.config.number_of_iterations, # 1000, 0.96, staircase=False) if idx == 0: tf.summary.scalar("lr/actor", learning_rate) #self.update_actor_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.actor_loss) #self.update_actor_op = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(self.actor_loss) #self.update_actor_op = tf.train.RMSPropOptimizer(learning_rate=learning_rate, momentum=0, epsilon=0.01).minimize(self.actor_loss, global_step = global_step) if idx != 0: global_step = None optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, momentum=0, epsilon=0.01) grad_var_pairs = optimizer.compute_gradients(actor_loss) vars = [x[1] for x in grad_var_pairs] grads = [x[0] for x in grad_var_pairs] print("actor", idx) print(vars) print(grads) self.record_grads(idx, vars, grads, "") clipped, _ = tf.clip_by_global_norm(grads, 40) self.record_grads(idx, vars, clipped, "_clipped") #tf.summary.histogram("grad/actor_clipped_{}".format(idx), clipped) train_op = optimizer.apply_gradients(zip(clipped, vars), global_step=global_step) self.update_actor_ops.append(train_op) # add variables for summary self.actor_loss_list.append(actor_loss) self.policy_entropy_list.append(policy_entropy) # add reset op self.reset_actor_ops.append( tf.initializers.variables(tf.trainable_variables(scope=scope)))
def train(images, labels, fold, model_type, batch_size, num_epochs, subj_id=0, reuse_cnn=False, dropout_rate=dropout_rate, learning_rate_default=1e-3, Optimizer=tf.train.AdamOptimizer, log_path=log_path): """ A sample training function which loops over the training set and evaluates the network on the validation set after each epoch. Evaluates the network on the training set whenever the :param images: input images :param labels: target labels :param fold: tuple of (train, test) index numbers :param model_type: model type ('cnn', '1dconv', 'lstm', 'mix') :param batch_size: batch size for training :param num_epochs: number of epochs of dataset to go over for training :param subj_id: the id of fold for storing log and the best model :param reuse_cnn: whether to train cnn first, and load its weight for multi-frame model :return: none """ with tf.name_scope('Inputs'): input_var = tf.placeholder(tf.float32, [None, None, 32, 32, n_colors], name='X_inputs') target_var = tf.placeholder(tf.int64, [None], name='y_inputs') tf_is_training = tf.placeholder(tf.bool, None, name='is_training') num_classes = len(np.unique(labels)) (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput(images, labels, fold) print('Train set label and proportion:\t', np.unique(y_train, return_counts=True)) print('Val set label and proportion:\t', np.unique(y_val, return_counts=True)) print('Test set label and proportion:\t', np.unique(y_test, return_counts=True)) print('The shape of X_trian:\t', X_train.shape) print('The shape of X_val:\t', X_val.shape) print('The shape of X_test:\t', X_test.shape) print("Building model and compiling functions...") if model_type == '1dconv': network = build_convpool_conv1d(input_var, num_classes, train=tf_is_training, dropout_rate=dropout_rate, name='CNN_Conv1d' + '_sbj' + str(subj_id)) elif model_type == 'lstm': network = build_convpool_lstm(input_var, num_classes, 100, train=tf_is_training, dropout_rate=dropout_rate, name='CNN_LSTM' + '_sbj' + str(subj_id)) elif model_type == 'mix': network = build_convpool_mix(input_var, num_classes, 100, train=tf_is_training, dropout_rate=dropout_rate, name='CNN_Mix' + '_sbj' + str(subj_id)) elif model_type == 'cnn': with tf.name_scope(name='CNN_layer' + '_fold' + str(subj_id)): network = build_cnn(input_var) # output shape [None, 4, 4, 128] convpool_flat = tf.reshape(network, [-1, 4 * 4 * 128]) h_fc1_drop1 = tf.layers.dropout(convpool_flat, rate=dropout_rate, training=tf_is_training, name='dropout_1') h_fc1 = tf.layers.dense(h_fc1_drop1, 256, activation=tf.nn.relu, name='fc_relu_256') h_fc1_drop2 = tf.layers.dropout(h_fc1, rate=dropout_rate, training=tf_is_training, name='dropout_2') network = tf.layers.dense(h_fc1_drop2, num_classes, name='fc_softmax') # the loss function contains the softmax activation else: raise ValueError( "Model not supported ['1dconv', 'maxpool', 'lstm', 'mix', 'cnn']") Train_vars = tf.trainable_variables() prediction = network with tf.name_scope('Loss'): l2_loss = tf.add_n( [tf.nn.l2_loss(v) for v in Train_vars if 'kernel' in v.name]) ce_loss = tf.losses.sparse_softmax_cross_entropy(labels=target_var, logits=prediction) _loss = ce_loss + weight_decay * l2_loss # decay_steps learning rate decay decay_steps = 3 * ( len(y_train) // batch_size ) # len(X_train)//batch_size the training steps for an epcoh with tf.name_scope('Optimizer'): # learning_rate = learning_rate_default * Decay_rate^(global_steps/decay_steps) global_steps = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.exponential_decay( # learning rate decay learning_rate_default, # Base learning rate. global_steps, decay_steps, 0.95, # Decay rate. staircase=True) optimizer = Optimizer( learning_rate) # GradientDescentOptimizer AdamOptimizer train_op = optimizer.minimize(_loss, global_step=global_steps, var_list=Train_vars) with tf.name_scope('Accuracy'): prediction = tf.argmax(prediction, axis=1) correct_prediction = tf.equal(prediction, target_var) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Output directory for models and summaries # choose different path for different model and subject out_dir = os.path.abspath( os.path.join(os.path.curdir, log_path, (model_type + '_' + str(subj_id)))) print("Writing to {}\n".format(out_dir)) # Summaries for loss, accuracy and learning_rate loss_summary = tf.summary.scalar('loss', _loss) acc_summary = tf.summary.scalar('train_acc', accuracy) lr_summary = tf.summary.scalar('learning_rate', learning_rate) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, lr_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, tf.get_default_graph()) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, tf.get_default_graph()) # Test summaries test_summary_op = tf.summary.merge([loss_summary, acc_summary]) test_summary_dir = os.path.join(out_dir, "summaries", "test") test_summary_writer = tf.summary.FileWriter(test_summary_dir, tf.get_default_graph()) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, model_type) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if model_type != 'cnn' and reuse_cnn: # saver for reuse the CNN weight reuse_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='VGG_NET_CNN') original_saver = tf.train.Saver( reuse_vars) # Pass the variables as a list saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) print("Starting training...") total_start_time = time.time() best_validation_accu = 0 init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) if model_type != 'cnn' and reuse_cnn: cnn_model_path = os.path.abspath( os.path.join(os.path.curdir, log_path, ('cnn_' + str(subj_id)), 'checkpoints')) cnn_model_path = tf.train.latest_checkpoint(cnn_model_path) print('-' * 20) print('Load cnn model weight for multi-frame model from {}'.format( cnn_model_path)) original_saver.restore(sess, cnn_model_path) stop_count = 0 # count for earlystopping for epoch in range(num_epochs): print('-' * 50) # Train set train_err = train_acc = train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch summary, _, pred, loss, acc = sess.run( [train_summary_op, train_op, prediction, _loss, accuracy], { input_var: inputs, target_var: targets, tf_is_training: True }) train_acc += acc train_err += loss train_batches += 1 train_summary_writer.add_summary(summary, sess.run(global_steps)) av_train_err = train_err / train_batches av_train_acc = train_acc / train_batches # Val set summary, pred, av_val_err, av_val_acc = sess.run( [dev_summary_op, prediction, _loss, accuracy], { input_var: X_val, target_var: y_val, tf_is_training: False }) dev_summary_writer.add_summary(summary, sess.run(global_steps)) print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) fmt_str = "Train \tEpoch [{:d}/{:d}] train_Loss: {:.4f}\ttrain_Acc: {:.2f}" print_str = fmt_str.format(epoch + 1, num_epochs, av_train_err, av_train_acc * 100) print(print_str) fmt_str = "Val \tEpoch [{:d}/{:d}] val_Loss: {:.4f}\tval_Acc: {:.2f}" print_str = fmt_str.format(epoch + 1, num_epochs, av_val_err, av_val_acc * 100) print(print_str) # Test set summary, pred, av_test_err, av_test_acc = sess.run( [test_summary_op, prediction, _loss, accuracy], { input_var: X_test, target_var: y_test, tf_is_training: False }) test_summary_writer.add_summary(summary, sess.run(global_steps)) fmt_str = "Test \tEpoch [{:d}/{:d}] test_Loss: {:.4f}\ttest_Acc: {:.2f}" print_str = fmt_str.format(epoch + 1, num_epochs, av_test_err, av_test_acc * 100) print(print_str) if av_val_acc > best_validation_accu: # early_stoping stop_count = 0 eraly_stoping_epoch = epoch best_validation_accu = av_val_acc test_acc_val = av_test_acc saver.save(sess, checkpoint_prefix, global_step=sess.run(global_steps)) else: stop_count += 1 if stop_count >= 10: # stop training if val_acc dose not imporve for over 10 epochs break train_batches = train_acc = 0 for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch acc = sess.run(accuracy, { input_var: X_train, target_var: y_train, tf_is_training: False }) train_acc += acc train_batches += 1 last_train_acc = train_acc / train_batches last_val_acc = av_val_acc last_test_acc = av_test_acc print('-' * 50) print('Time in total:', time.time() - total_start_time) print("Best validation accuracy:\t\t{:.2f} %".format( best_validation_accu * 100)) print( "Test accuracy when got the best validation accuracy:\t\t{:.2f} %". format(test_acc_val * 100)) print('-' * 50) print("Last train accuracy:\t\t{:.2f} %".format(last_train_acc * 100)) print("Last validation accuracy:\t\t{:.2f} %".format(last_val_acc * 100)) print("Last test accuracy:\t\t\t\t{:.2f} %".format(last_test_acc * 100)) print('Early Stopping at epoch: {}'.format(eraly_stoping_epoch + 1)) train_summary_writer.close() dev_summary_writer.close() test_summary_writer.close() return [ last_train_acc, best_validation_accu, test_acc_val, last_val_acc, last_test_acc ]
# train all model variations for trainable in trainables: for hidden_layer in hidden_layers: for dropout_r in dropout_rs: for hidden_size in hidden_sizes: for regularize in regularizes: if not os.path.exists('weights'): os.makedirs('weights') if not os.path.exists('logs'): os.makedirs('logs') if not os.path.exists('plots'): os.makedirs('plots') # build and train model model = build_cnn(config, trainable, hidden_layer, dropout_r, hidden_size, regularize) history, weights_path = train_model(config, model) train_acc, val_acc = history.history['acc'], history.history['val_acc'] train_loss, val_loss = history.history['loss'], history.history['val_loss'] plot_loss_acc(config['max_epoch'], train_acc, val_acc, train_loss, val_loss, model.name) # evaluate model on test set test_acc_best, test_loss_best = eval_models(weights_path, test_x, test_y) # save all metrics in csv train_acc_best = max(train_acc) val_acc_best = max(val_acc) train_loss_best = min(train_loss) val_loss_best = min(val_loss) # write to csv
def main(): parser = ArgumentParser('CNN_Node', formatter_class=ArgumentDefaultsHelpFormatter, conflict_handler='resolve') parser.add_argument('--input', required=True, help='Input graph file') parser.add_argument('--label', required=True, help='Input label file') parser.add_argument('--output', required=True, help='Output Embedding file') parser.add_argument('--temp', required=True, help='Temporal file for saving data') parser.add_argument('--neighbor', default=200, type=int, help='Number of neighbor in constructing features') parser.add_argument('--negative', default=5, type=int, help='Number of negative sampling edges') parser.add_argument('--iteration', default=10, type=int, help='Number of iteration') parser.add_argument('--alpha', default=0.1, type=float, help='learning rate for SGD') parser.add_argument('--num_kernel', nargs='+', type=int, help='Number of channel in convolutional Layers') parser.add_argument('--kernel_size', nargs='+', type=int, help='Size of kernel in convolutional Layers') parser.add_argument('--dimension', default=100, type=int, help='Dimension of Output Embedding') args = parser.parse_args() os.chdir(os.path.dirname(os.path.realpath(__file__))) label_matrix_file = 'temp_label.txt' start_time_1 = timeit.default_timer() # features, edges, neg_edges, node2id = read_graph(args.input, args.neighbor, args.negative) features, edges, neg_edges, node2id = node_selection( args.input, 2, args.neighbor, args.negative) # save_data(features, edges, neg_edges, node2id, args.temp) # features, edges, neg_edges, node2id = load_data(args.temp) end_time_1 = timeit.default_timer() print 'Run for constructing %.2fs' % (end_time_1 - start_time_1) start_time_2 = timeit.default_timer() embeddings = build_cnn(X=features, edges=edges, neg_edges=neg_edges, alpha=args.alpha, n_epochs=args.iteration, nkerns=args.num_kernel, kerns_size=args.kernel_size, dimension=args.dimension) end_time_2 = timeit.default_timer() print 'Run for training %.2fs' % (end_time_2 - start_time_2) start_time_3 = timeit.default_timer() write_file(embeddings, node2id, args.label, args.output, label_matrix_file) multi_label_classification(args.output, label_matrix_file) end_time_3 = timeit.default_timer() print 'Run for testing %.2fs' % (end_time_3 - start_time_3)