def train(): # Get SVHN dataset svhn_maybe_download_and_extract() file_name = os.path.join(FLAGS.svhn_data_dir, "train_32x32.mat") train = sio.loadmat(file_name) tr_data_svhn = np.zeros((len(train['y']), 32 * 32 * 3), dtype=float) tr_label_svhn = np.zeros((len(train['y']), 10), dtype=float) for i in range(len(train['y'])): tr_data_svhn[i] = np.reshape(train['X'][:, :, :, i], [1, 32 * 32 * 3]) tr_label_svhn[i, train['y'][i][0] - 1] = 1.0 tr_data_svhn = tr_data_svhn / 255.0 file_name = os.path.join(FLAGS.svhn_data_dir, "test_32x32.mat") test = sio.loadmat(file_name) ts_data_svhn = np.zeros((len(test['y']), 32 * 32 * 3), dtype=float) ts_label_svhn = np.zeros((len(test['y']), 10), dtype=float) for i in range(len(test['y'])): ts_data_svhn[i] = np.reshape(test['X'][:, :, :, i], [1, 32 * 32 * 3]) ts_label_svhn[i, test['y'][i][0] - 1] = 1.0 ts_data_svhn = ts_data_svhn / 255.0 data_num_len_svhn = len(tr_label_svhn) # Get CIFAR 10 dataset cifar10.maybe_download_and_extract() tr_label_cifar10 = np.zeros((50000, 10), dtype=float) ts_label_cifar10 = np.zeros((10000, 10), dtype=float) for i in range(1, 6): file_name = os.path.join(FLAGS.cifar_data_dir, "data_batch_" + str(i) + ".bin") f = open(file_name, "rb") data = np.reshape(bytearray(f.read()), [10000, 3073]) if (i == 1): tr_data_cifar10 = data[:, 1:] / 255.0 else: tr_data_cifar10 = np.append(tr_data_cifar10, data[:, 1:] / 255.0, axis=0) for j in range(len(data)): tr_label_cifar10[(i - 1) * 10000 + j, data[j, 0]] = 1.0 file_name = os.path.join(FLAGS.cifar_data_dir, "test_batch.bin") f = open(file_name, "rb") data = np.reshape(bytearray(f.read()), [10000, 3073]) for i in range(len(data)): ts_label_cifar10[i, data[i, 0]] = 1.0 ts_data_cifar10 = data[:, 1:] / 255.0 data_num_len_cifar10 = len(tr_label_cifar10) print(ts_label_cifar10.shape) print(ts_label_cifar10[0]) if (FLAGS.cifar_first): tr_data1 = tr_data_cifar10 tr_label1 = tr_label_cifar10 ts_data1 = ts_data_cifar10 ts_label1 = ts_label_cifar10 data_num_len1 = data_num_len_cifar10 tr_data2 = tr_data_svhn tr_label2 = tr_label_svhn ts_data2 = ts_data_svhn ts_label2 = ts_label_svhn data_num_len2 = data_num_len_svhn else: tr_data1 = tr_data_svhn tr_label1 = tr_label_svhn ts_data1 = ts_data_svhn ts_label1 = ts_label_svhn data_num_len1 = data_num_len_svhn tr_data2 = tr_data_cifar10 tr_label2 = tr_label_cifar10 ts_data2 = ts_data_cifar10 ts_label2 = ts_label_cifar10 data_num_len2 = data_num_len_cifar10 ## TASK 1 sess = tf.InteractiveSession() # Input placeholders with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 32 * 32 * 3], name='x-input') y_ = tf.placeholder(tf.float32, [None, 10], name='y-input') with tf.name_scope('input_reshape'): image_shaped_input = tf.reshape(x, [-1, 32, 32, 3]) tf.summary.image('input', image_shaped_input, 2) # geopath_examples geopath = pathnet.geopath_initializer(FLAGS.L, FLAGS.M) # fixed weights list fixed_list = np.ones((FLAGS.L, FLAGS.M), dtype=str) for i in range(FLAGS.L): for j in range(FLAGS.M): fixed_list[i, j] = '0' # Hidden Layers weights_list = np.zeros((FLAGS.L, FLAGS.M), dtype=object) biases_list = np.zeros((FLAGS.L, FLAGS.M), dtype=object) # model define layer_modules_list = np.zeros(FLAGS.M, dtype=object) # conv layer i = 0 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.conv_module(image_shaped_input, FLAGS.filt, [5, 5], geopath[i, j], 1, 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # res-fire layer i = 1 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.res_fire_layer( net, FLAGS.filt, 10, 10, geopath[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # dimensionality_reduction layer i = 2 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.Dimensionality_reduction_module( net, 10, geopath[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # conv layer i = 3 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.conv_module(image_shaped_input, FLAGS.filt, [5, 5], geopath[i, j], 1, 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # output layer # reshape _shape = net.shape[1:] _length = 1 for _i in _shape: _length *= int(_i) net = tf.reshape(net, [-1, _length]) # full connection layer y, output_weights, output_biases = pathnet.nn_layer( net, 10, 'output_layer') # Cross Entropy with tf.name_scope('cross_entropy'): diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) # Need to learn variables var_list_to_learn = [] + output_weights + output_biases for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '0'): var_list_to_learn += weights_list[i, j] + biases_list[i, j] # GradientDescent with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer( FLAGS.learning_rate).minimize(cross_entropy, var_list=var_list_to_learn) # Accuracy with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # Merge all the summaries and write them out to /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train1', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test1') tf.global_variables_initializer().run() # Generating randomly geopath geopath_set = np.zeros(FLAGS.candi, dtype=object) for i in range(FLAGS.candi): geopath_set[i] = pathnet.get_geopath(FLAGS.L, FLAGS.M, FLAGS.N) # parameters placeholders and ops var_update_ops = np.zeros(len(var_list_to_learn), dtype=object) var_update_placeholders = np.zeros(len(var_list_to_learn), dtype=object) for i in range(len(var_list_to_learn)): var_update_placeholders[i] = tf.placeholder( var_list_to_learn[i].dtype, shape=var_list_to_learn[i].get_shape()) var_update_ops[i] = var_list_to_learn[i].assign( var_update_placeholders[i]) # geopathes placeholders and ops geopath_update_ops = np.zeros((len(geopath), len(geopath[0])), dtype=object) geopath_update_placeholders = np.zeros((len(geopath), len(geopath[0])), dtype=object) for i in range(len(geopath)): for j in range(len(geopath[0])): geopath_update_placeholders[i, j] = tf.placeholder( geopath[i, j].dtype, shape=geopath[i, j].get_shape()) geopath_update_ops[i, j] = geopath[i, j].assign( geopath_update_placeholders[i, j]) acc_geo = np.zeros(FLAGS.B, dtype=float) summary_geo = np.zeros(FLAGS.B, dtype=object) for i in range(FLAGS.max_steps): # Select Candidates to Tournament compet_idx = range(FLAGS.candi) np.random.shuffle(compet_idx) compet_idx = compet_idx[:FLAGS.B] # Learning & Evaluating for j in range(len(compet_idx)): # Shuffle the data idx = range(len(tr_data1)) np.random.shuffle(idx) tr_data1 = tr_data1[idx] tr_label1 = tr_label1[idx] # Insert Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_set[compet_idx[j]], FLAGS.L, FLAGS.M) acc_geo_tr = 0 for k in range(FLAGS.T): ''' print(x.shape) print(tr_data1[k*FLAGS.batch_num:(k+1)*FLAGS.batch_num,:].shape) print(y.shape) print(tr_label1[k*FLAGS.batch_num:(k+1)*FLAGS.batch_num,:].shape) ''' summary_geo_tr, _, acc_geo_tmp = sess.run( [merged, train_step, accuracy], feed_dict={ x: tr_data1[k * FLAGS.batch_num:(k + 1) * FLAGS.batch_num, :], y_: tr_label1[k * FLAGS.batch_num:(k + 1) * FLAGS.batch_num, :] }) acc_geo_tr += acc_geo_tmp acc_geo[j] = acc_geo_tr / FLAGS.T summary_geo[j] = summary_geo_tr # Tournament winner_idx = np.argmax(acc_geo) acc = acc_geo[winner_idx] summary = summary_geo[winner_idx] # Copy and Mutation for j in range(len(compet_idx)): if (j != winner_idx): geopath_set[compet_idx[j]] = np.copy( geopath_set[compet_idx[winner_idx]]) geopath_set[compet_idx[j]] = pathnet.mutation( geopath_set[compet_idx[j]], FLAGS.L, FLAGS.M, FLAGS.N) train_writer.add_summary(summary, i) print('Training Accuracy at step %s: %s' % (i, acc)) acc_task1 = acc task1_optimal_path = geopath_set[compet_idx[winner_idx]] # Fix task1 Optimal Path for i in range(FLAGS.L): for j in range(FLAGS.M): if (task1_optimal_path[i, j] == 1.0): fixed_list[i, j] = '1' # Get variables of fixed list var_list_to_fix = [] #var_list_to_fix=[]+output_weights+output_biases; for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '1'): var_list_to_fix += weights_list[i, j] + biases_list[i, j] var_list_fix = pathnet.parameters_backup(var_list_to_fix) # parameters placeholders and ops var_fix_ops = np.zeros(len(var_list_to_fix), dtype=object) var_fix_placeholders = np.zeros(len(var_list_to_fix), dtype=object) for i in range(len(var_list_to_fix)): var_fix_placeholders[i] = tf.placeholder( var_list_to_fix[i].dtype, shape=var_list_to_fix[i].get_shape()) var_fix_ops[i] = var_list_to_fix[i].assign(var_fix_placeholders[i]) ## TASK 2 # Need to learn variables var_list_to_learn = [] + output_weights + output_biases for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '0'): var_list_to_learn += weights_list[i, j] + biases_list[i, j] for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '1'): tmp = biases_list[i, j][0] break break # Initialization merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train2', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test2') tf.global_variables_initializer().run() # Update fixed values pathnet.parameters_update(sess, var_fix_placeholders, var_fix_ops, var_list_fix) # GradientDescent with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer( FLAGS.learning_rate).minimize(cross_entropy, var_list=var_list_to_learn) # Generating randomly geopath geopath_set = np.zeros(FLAGS.candi, dtype=object) for i in range(FLAGS.candi): geopath_set[i] = pathnet.get_geopath(FLAGS.L, FLAGS.M, FLAGS.N) # parameters placeholders and ops var_update_ops = np.zeros(len(var_list_to_learn), dtype=object) var_update_placeholders = np.zeros(len(var_list_to_learn), dtype=object) for i in range(len(var_list_to_learn)): var_update_placeholders[i] = tf.placeholder( var_list_to_learn[i].dtype, shape=var_list_to_learn[i].get_shape()) var_update_ops[i] = var_list_to_learn[i].assign( var_update_placeholders[i]) acc_geo = np.zeros(FLAGS.B, dtype=float) summary_geo = np.zeros(FLAGS.B, dtype=object) for i in range(FLAGS.max_steps): # Select Candidates to Tournament compet_idx = range(FLAGS.candi) np.random.shuffle(compet_idx) compet_idx = compet_idx[:FLAGS.B] # Learning & Evaluating for j in range(len(compet_idx)): # Shuffle the data idx = range(len(tr_data2)) np.random.shuffle(idx) tr_data2 = tr_data2[idx] tr_label2 = tr_label2[idx] geopath_insert = np.copy(geopath_set[compet_idx[j]]) for l in range(FLAGS.L): for m in range(FLAGS.M): if (fixed_list[l, m] == '1'): geopath_insert[l, m] = 1.0 # Insert Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_insert, FLAGS.L, FLAGS.M) acc_geo_tr = 0 for k in range(FLAGS.T): summary_geo_tr, _, acc_geo_tmp = sess.run( [merged, train_step, accuracy], feed_dict={ x: tr_data2[k * FLAGS.batch_num:(k + 1) * FLAGS.batch_num, :], y_: tr_label2[k * FLAGS.batch_num:(k + 1) * FLAGS.batch_num, :] }) acc_geo_tr += acc_geo_tmp acc_geo[j] = acc_geo_tr / FLAGS.T summary_geo[j] = summary_geo_tr # Tournament winner_idx = np.argmax(acc_geo) acc = acc_geo[winner_idx] summary = summary_geo[winner_idx] # Copy and Mutation for j in range(len(compet_idx)): if (j != winner_idx): geopath_set[compet_idx[j]] = np.copy( geopath_set[compet_idx[winner_idx]]) geopath_set[compet_idx[j]] = pathnet.mutation( geopath_set[compet_idx[j]], FLAGS.L, FLAGS.M, FLAGS.N) train_writer.add_summary(summary, i) print('Training Accuracy at step %s: %s' % (i, acc)) acc_task2 = acc if (FLAGS.cifar_first): print("CIFAR10_SVHN,TASK1:" + str(acc_task1) + ",TASK2:" + str(acc_task2) + ",Done") else: print("SVHN_CIFAR10,TASK1:" + str(acc_task1) + ",TASK2:" + str(acc_task2) + ",Done") train_writer.close() test_writer.close()
def train(tr_data_cifar10, tr_label_cifar10, data_num_len_cifar10, ts_data_cifar10, ts_label_cifar10, ts_num_len_cifar10, candidate, max_steps): #candidate.display_structure() # define local variables tr_data1 = tr_data_cifar10 tr_label1 = tr_label_cifar10 data_num_len1 = data_num_len_cifar10 max_data_len = int(data_num_len1 / FLAGS.batch_num) # avoid [a:b], a will greater than b ts_data1 = ts_data_cifar10 ts_label1 = ts_label_cifar10 ts_num_len1 = ts_num_len_cifar10 L = int(candidate.feature_layer_num + candidate.fc_layer_num + 1) # +1 for first conv layer M = int(candidate.module_num) F = int(candidate.filter_num ) * 2 # due to filter number must be an even number FC = int(candidate.fc_layer_num) FL = candidate.feature_layer_array ## TASK 1 sess = tf.InteractiveSession() # Input placeholders with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 32 * 32 * 3], name='x-input') y_ = tf.placeholder(tf.float32, [None, 10], name='y-input') keep_prob = tf.placeholder(tf.float32) with tf.name_scope('input_reshape'): image_shaped_input = tf.reshape(x, [-1, 32, 32, 3]) ## need to change when involve task2 # geopath_examples geopath = pathnet.geopath_initializer(L, M) ## need to change when involve task2 # fixed weights list fixed_list = np.ones((L, M), dtype=str) for i in range(L): for j in range(M): fixed_list[i, j] = '0' # record weights, biases and sum_weights that need to change weights_list = np.zeros((L, M), dtype=object) biases_list = np.zeros((L, M), dtype=object) sum_weights_list = np.zeros((L, M), dtype=object) for i in range(L): for j in range(M): _initial1 = tf.truncated_normal(shape=[1], mean=1, stddev=0.1) _initial2 = tf.truncated_normal(shape=[1], mean=1, stddev=0.1) sum_weights_list[i, j] = [ tf.Variable(_initial1), tf.Variable(_initial2) ] ## model define layer_modules_list = np.zeros(M, dtype=object) # first layer: conv for j in range(M): layer_modules_list[j], weights_list[0, j], biases_list[ 0, j] = pathnet.conv_module( sum_weights_list[0][j][1] * image_shaped_input, F, [5, 5], geopath[0, j], 1, 'conv_layer' + str(0 + 1) + "_" + str(j + 1), keep_prob) net = np.sum( map(lambda (a, b): a * b[0], zip(layer_modules_list, sum_weights_list[0]))) / M # feature abstract layers for i in range(len(FL)): if FL[i] == 0: for j in range(M): layer_modules_list[j], weights_list[i + 1, j], biases_list[ i + 1, j] = pathnet.res_fire_layer( sum_weights_list[i + 1, j][1] * net, geopath[i + 1, j], 'res_fire_layer' + str(i + 2) + "_" + str(j + 1), keep_prob) else: # check dimension_reduction input whether to small 1*1 if int(net.get_shape()[1]) == 1 and int(net.get_shape()[2]) == 1: candidate.disable_mask[i] = 1 continue for j in range(M): layer_modules_list[j], weights_list[i + 1, j], biases_list[ i + 1, j] = pathnet.Dimensionality_reduction_module( sum_weights_list[i + 1, j][1] * net, geopath[i + 1, j], 'dimension_reduction_layer' + str(i + 2) + "_" + str(j + 1)) net = np.sum( map(lambda (a, b): a * b[0], zip(layer_modules_list, sum_weights_list[i + 1]))) / M # full connection layer # reshape _shape = net.shape[1:] _length = 1 for _i in _shape: _length *= int(_i) net = tf.reshape(net, [-1, _length]) # full connection for i in range(L)[len(FL) + 1:]: for j in range(M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.fc_layer( sum_weights_list[i][j][1] * net, F, geopath[i, j], 'fc_layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum( map(lambda (a, b): a * b[0], zip(layer_modules_list, sum_weights_list[i]))) / M # output layer y, output_weights, output_biases = pathnet.nn_layer( net, 10, 'output_layer' + str(i)) # Cross Entropy with tf.name_scope('cross_entropy'): diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) # Need to learn variables var_list_to_learn = [] + output_weights + output_biases for i in range(L): # disabled layer don't have argvs to learn if i > 0 and i < candidate.maxFr + 1 and candidate.disable_mask[ i - 1] == 1: continue for j in range(M): if (fixed_list[i, j] == '0'): var_list_to_learn += weights_list[i, j] + biases_list[ i, j] + sum_weights_list[i, j] # GradientDescent with tf.name_scope('train'): train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize( cross_entropy, var_list=var_list_to_learn) # Accuracy with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # init tf.global_variables_initializer().run() # Learning & Evaluating # Shuffle the data #idx=range(len(tr_data1)) #np.random.shuffle(idx) #tr_data1=tr_data1[idx] #tr_label1=tr_label1[idx] step_list = [max_data_len for i in range(int(max_steps / max_data_len)) ] + [max_steps % max_data_len] counter = 0 acc_geo_tr = 0 #print(step_list) #print(max_data_len) #print("max_steps: "+max_steps) for s in step_list: idx = range(len(tr_data1)) np.random.shuffle(idx) tr_data1 = tr_data1[idx] tr_label1 = tr_label1[idx] for k in range(s): _, acc_geo_tmp = sess.run( [train_step, accuracy], feed_dict={ x: tr_data1[k * FLAGS.batch_num:(k + 1) * FLAGS.batch_num, :], y_: tr_label1[k * FLAGS.batch_num:(k + 1) * FLAGS.batch_num, :], keep_prob: FLAGS.dropout }) acc_geo_tr += acc_geo_tmp counter += 1 if (counter > 100 and counter % 1000 == 0): print("step %d, single_acc %f" % (counter, acc_geo_tmp)) # test on test set if (counter % 100 == 0): test_acc = [] for i in range(100): test_acc += [ sess.run(accuracy, feed_dict={ x: ts_data1[i * 100:(i + 1) * 100, :], y_: ts_label1[i * 100:(i + 1) * 100, :], keep_prob: 1 }) ] print("step %d, acc_on_test_set %f" % (counter, sum(test_acc) / 100)) sess.close() return acc_geo_tr / max_steps
def train(): # Get SVHN dataset svhn_maybe_download_and_extract() file_name = os.path.join(FLAGS.svhn_data_dir, "train_32x32.mat") train = sio.loadmat(file_name) tr_data_svhn = np.zeros((len(train['y']), 32 * 32 * 3), dtype=float) tr_label_svhn = np.zeros((len(train['y']), 10), dtype=float) for i in range(len(train['y'])): tr_data_svhn[i] = np.reshape(train['X'][:, :, :, i], [1, 32 * 32 * 3]) tr_label_svhn[i, train['y'][i][0] - 1] = 1.0 tr_data_svhn = tr_data_svhn / 255.0 tr_label_svhn = np.zeros((len(train['y']), 10), dtype=float) file_name = os.path.join(FLAGS.svhn_data_dir, "test_32x32.mat") test = sio.loadmat(file_name) ts_data_svhn = np.zeros((len(test['y']), 32 * 32 * 3), dtype=float) ts_label_svhn = np.zeros((len(test['y']), 10), dtype=float) for i in range(len(test['y'])): ts_data_svhn[i] = np.reshape(test['X'][:, :, :, i], [1, 32 * 32 * 3]) ts_label_svhn[i, test['y'][i][0] - 1] = 1.0 ts_data_svhn = ts_data_svhn / 255.0 data_num_len_svhn = len(tr_label_svhn) # Get CIFAR 10 dataset cifar10.maybe_download_and_extract() tr_label_cifar10 = np.zeros((50000, 10), dtype=float) ts_label_cifar10 = np.zeros((10000, 10), dtype=float) for i in range(1, 6): file_name = os.path.join(FLAGS.cifar_data_dir, "data_batch_" + str(i) + ".bin") f = open(file_name, "rb") data = np.reshape(bytearray(f.read()), [10000, 3073]) if (i == 1): tr_data_cifar10 = data[:, 1:] / 255.0 else: tr_data_cifar10 = np.append(tr_data_cifar10, data[:, 1:] / 255.0, axis=0) for j in range(len(data)): tr_label_cifar10[(i - 1) * 10000 + j, data[j, 0]] = 1.0 file_name = os.path.join(FLAGS.cifar_data_dir, "test_batch.bin") f = open(file_name, "rb") data = np.reshape(bytearray(f.read()), [10000, 3073]) for i in range(len(data)): ts_label_cifar10[i, data[i, 0]] = 1.0 ts_data_cifar10 = data[:, 1:] / 255.0 data_num_len_cifar10 = len(tr_label_cifar10) tr_data1 = tr_data_cifar10 tr_label1 = tr_label_cifar10 ts_data1 = ts_data_cifar10 ts_label1 = ts_label_cifar10 data_num_len1 = data_num_len_cifar10 tr_data2 = tr_data_svhn tr_label2 = tr_label_svhn ts_data2 = ts_data_svhn ts_label2 = ts_label_svhn data_num_len2 = data_num_len_svhn ## TASK 1 (CIFAR 10) sess = tf.InteractiveSession() # Create a multilayer model. # Input placeholders with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 32 * 32 * 3], name='x-input') y_ = tf.placeholder(tf.float32, [None, 10], name='y-input') with tf.name_scope('input_reshape'): image_shaped_input = tf.reshape(x, [-1, 32, 32, 3]) tf.summary.image('input', image_shaped_input, 10) # geopath_examples geopath = pathnet.geopath_initializer(FLAGS.L, FLAGS.M) # fixed weights list fixed_list = np.ones((FLAGS.L, FLAGS.M), dtype=str) for i in range(FLAGS.L): for j in range(FLAGS.M): fixed_list[i, j] = '0' # reinitializing weights list rein_list = np.ones((FLAGS.L, FLAGS.M), dtype=str) for i in range(FLAGS.L): for j in range(FLAGS.M): rein_list[i, j] = '0' # Input Layer """ input_weights=pathnet.module_weight_variable([784,FLAGS.filt]); input_biases=pathnet.module_bias_variable([FLAGS.filt]); net = pathnet.nn_layer(x,input_weights,input_biases,'input_layer'); """ # Hidden Layers weights_list = np.zeros((FLAGS.L, FLAGS.M), dtype=object) biases_list = np.zeros((FLAGS.L, FLAGS.M), dtype=object) for i in range(FLAGS.L): for j in range(FLAGS.M): if (i == 0): weights_list[i, j] = pathnet.module_weight_variable( [32 * 32 * 3, FLAGS.filt]) biases_list[i, j] = pathnet.module_bias_variable([FLAGS.filt]) else: weights_list[i, j] = pathnet.module_weight_variable( [FLAGS.filt, FLAGS.filt]) biases_list[i, j] = pathnet.module_bias_variable([FLAGS.filt]) for i in range(FLAGS.L): layer_modules_list = np.zeros(FLAGS.M, dtype=object) for j in range(FLAGS.M): if (i == 0): layer_modules_list[j] = pathnet.module( x, weights_list[i, j], biases_list[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) * geopath[i, j] else: layer_modules_list[j] = pathnet.module( net, weights_list[i, j], biases_list[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) * geopath[i, j] net = np.sum(layer_modules_list) """ with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) tf.summary.scalar('dropout_keep_probability', keep_prob) dropped = tf.nn.dropout(hidden1, keep_prob) """ # Do not apply softmax activation yet, see below. output_weights = pathnet.module_weight_variable([FLAGS.filt, 10]) output_biases = pathnet.module_bias_variable([10]) y = pathnet.nn_layer(net, output_weights, output_biases, 'output_layer', act=tf.identity) with tf.name_scope('cross_entropy'): diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) # Need to learn variables #var_list_to_learn=[]+input_weights+input_biases+output_weights+output_biases; var_list_to_learn = [] + output_weights + output_biases for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '0'): var_list_to_learn += weights_list[i, j] + biases_list[i, j] with tf.name_scope('train'): train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize( cross_entropy, var_list=var_list_to_learn) def feed_dict(train, tr_flag=0): #Make a TensorFlow feed_dict: maps data onto Tensor placeholders. if train or FLAGS.fake_data: xs = tr_data1[tr_flag:tr_flag + 16, :] ys = tr_label1[tr_flag:tr_flag + 16, :] k = FLAGS.dropout else: xs = ts_data1 ys = ts_label1 k = 1.0 return {x: xs, y_: ys} with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # Merge all the summaries and write them out to /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test') tf.global_variables_initializer().run() # Generating randomly geopath geopath_set = np.zeros(FLAGS.candi, dtype=object) for i in range(FLAGS.candi): geopath_set[i] = pathnet.get_geopath(FLAGS.L, FLAGS.M, FLAGS.N) # parameters placeholders and ops var_update_ops = np.zeros(len(var_list_to_learn), dtype=object) var_update_placeholders = np.zeros(len(var_list_to_learn), dtype=object) for i in range(len(var_list_to_learn)): var_update_placeholders[i] = tf.placeholder( var_list_to_learn[i].dtype, shape=var_list_to_learn[i].get_shape()) var_update_ops[i] = var_list_to_learn[i].assign( var_update_placeholders[i]) # geopathes placeholders and ops geopath_update_ops = np.zeros((len(geopath), len(geopath[0])), dtype=object) geopath_update_placeholders = np.zeros((len(geopath), len(geopath[0])), dtype=object) for i in range(len(geopath)): for j in range(len(geopath[0])): geopath_update_placeholders[i, j] = tf.placeholder( geopath[i, j].dtype, shape=geopath[i, j].get_shape()) geopath_update_ops[i, j] = geopath[i, j].assign( geopath_update_placeholders[i, j]) tr_flag = 0 for i in range(FLAGS.max_steps): # Select Two Candidate to Tournament first, second = pathnet.select_two_candi(FLAGS.candi) # First Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_set[first], FLAGS.L, FLAGS.M) var_list_backup = pathnet.parameters_backup(var_list_to_learn) tr_flag_bak = tr_flag for j in range(FLAGS.T): summary_geo1_tr, _ = sess.run([merged, train_step], feed_dict=feed_dict(train=True, tr_flag=tr_flag)) tr_flag = (tr_flag + 16) % data_num_len1 summary_geo1_ts, acc_geo1 = sess.run([merged, accuracy], feed_dict=feed_dict(train=False)) var_list_task1 = pathnet.parameters_backup(var_list_to_learn) tr_flag = tr_flag_bak # Second Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_set[second], FLAGS.L, FLAGS.M) pathnet.parameters_update(sess, var_update_placeholders, var_update_ops, var_list_backup) for j in range(FLAGS.T): summary_geo2_tr, _ = sess.run([merged, train_step], feed_dict=feed_dict(train=True, tr_flag=tr_flag)) tr_flag = (tr_flag + 16) % data_num_len1 summary_geo2_ts, acc_geo2 = sess.run([merged, accuracy], feed_dict=feed_dict(train=False)) var_list_task2 = pathnet.parameters_backup(var_list_to_learn) # Compatition between two cases if (acc_geo1 > acc_geo2): geopath_set[second] = np.copy(geopath_set[first]) pathnet.mutation(geopath_set[second], FLAGS.L, FLAGS.M, FLAGS.N) pathnet.parameters_update(sess, var_update_placeholders, var_update_ops, var_list_task1) train_writer.add_summary(summary_geo1_tr, i) test_writer.add_summary(summary_geo1_ts, i) print('Accuracy at step %s: %s' % (i + 1, acc_geo1)) else: geopath_set[first] = np.copy(geopath_set[second]) pathnet.mutation(geopath_set[first], FLAGS.L, FLAGS.M, FLAGS.N) pathnet.parameters_update(sess, var_update_placeholders, var_update_ops, var_list_task2) train_writer.add_summary(summary_geo2_tr, i) test_writer.add_summary(summary_geo2_ts, i) print('Accuracy at step %s: %s' % (i + 1, acc_geo2)) if (acc_geo1 > acc_geo2): task1_acc = acc_geo1 task1_optimal_path = geopath_set[first] else: task1_acc = acc_geo2 task1_optimal_path = geopath_set[second] ## TASK 2 (SVHN) # Fix task1 Optimal Path for i in range(FLAGS.L): for j in range(FLAGS.M): if (task1_optimal_path[i, j] == 1.0): fixed_list[i, j] = '1' else: rein_list[i, j] = '1' # reinitializing weights var_list_to_reinitial = [] for i in range(FLAGS.L): for j in range(FLAGS.M): if (rein_list[i, j] == '1'): var_list_to_reinitial += weights_list[i, j] + biases_list[i, j] tf.variables_initializer(var_list=var_list_to_reinitial).run() # Output Layer for Task2 output_weights2 = pathnet.module_weight_variable([FLAGS.filt, 10]) output_biases2 = pathnet.module_bias_variable([10]) y2 = pathnet.nn_layer(net, output_weights2, output_biases2, 'output_layer2', act=tf.identity) with tf.name_scope('cross_entropy2'): diff2 = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y2) with tf.name_scope('total2'): cross_entropy2 = tf.reduce_mean(diff2) tf.summary.scalar('cross_entropy2', cross_entropy2) # Need to learn variables #var_list_to_learn=[]+input_weights+input_biases+output_weights2+output_biases2; var_list_to_learn = [] + output_weights2 + output_biases2 for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '0'): var_list_to_learn += weights_list[i, j] + biases_list[i, j] with tf.name_scope('train2'): train_step2 = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize( cross_entropy2, var_list=var_list_to_learn) #train_step2 = tf.train.GradientDescentOptimizer(FLAGS.learning_rate).minimize( # cross_entropy2,var_list=var_list_to_learn) with tf.name_scope('accuracy2'): with tf.name_scope('correct_prediction2'): correct_prediction2 = tf.equal(tf.argmax(y2, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy2'): accuracy2 = tf.reduce_mean(tf.cast(correct_prediction2, tf.float32)) tf.summary.scalar('accuracy2', accuracy2) # Merge all the summaries and write them out to /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default) merged2 = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train2', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test2') tf.global_variables_initializer().run() def feed_dict2(train, tr_flag=0): #Make a TensorFlow feed_dict: maps data onto Tensor placeholders. if train or FLAGS.fake_data: xs = tr_data2[tr_flag:tr_flag + 16, :] ys = tr_label2[tr_flag:tr_flag + 16, :] k = FLAGS.dropout else: xs = ts_data2 ys = ts_label2 k = 1.0 return {x: xs, y_: ys} # Generating randomly geopath geopath_set = np.zeros(FLAGS.candi, dtype=object) for i in range(FLAGS.candi): geopath_set[i] = pathnet.get_geopath(FLAGS.L, FLAGS.M, FLAGS.N) # parameters placeholders and ops var_update_ops = np.zeros(len(var_list_to_learn), dtype=object) var_update_placeholders = np.zeros(len(var_list_to_learn), dtype=object) for i in range(len(var_list_to_learn)): var_update_placeholders[i] = tf.placeholder( var_list_to_learn[i].dtype, shape=var_list_to_learn[i].get_shape()) var_update_ops[i] = var_list_to_learn[i].assign( var_update_placeholders[i]) tr_flag = 0 for i in range(FLAGS.max_steps): # Select Two Candidate to Tournament first, second = pathnet.select_two_candi(FLAGS.candi) # First Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_set[first], FLAGS.L, FLAGS.M) tr_flag_bak = tr_flag var_list_backup = pathnet.parameters_backup(var_list_to_learn) for j in range(FLAGS.T): summary_geo1_tr, _ = sess.run([merged2, train_step2], feed_dict=feed_dict2(True, tr_flag)) tr_flag = (tr_flag + 16) % data_num_len2 summary_geo1_ts, acc_geo1 = sess.run([merged2, accuracy2], feed_dict=feed_dict2(False)) var_list_task1 = pathnet.parameters_backup(var_list_to_learn) # Second Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_set[first], FLAGS.L, FLAGS.M) tr_flag = tr_flag_bak pathnet.parameters_update(sess, var_update_placeholders, var_update_ops, var_list_backup) for j in range(FLAGS.T - 1): summary_geo2_tr, _, acc_geo2_tmp = sess.run( [merged2, train_step2, accuracy2], feed_dict=feed_dict2(True, tr_flag)) tr_flag = (tr_flag + 16) % data_num_len2 summary_geo2_ts, acc_geo2 = sess.run([merged2, accuracy2], feed_dict=feed_dict2(False)) var_list_task2 = pathnet.parameters_backup(var_list_to_learn) # Compatition between two cases if (acc_geo1 > acc_geo2): geopath_set[second] = np.copy(geopath_set[first]) pathnet.mutation(geopath_set[second], FLAGS.L, FLAGS.M, FLAGS.N) pathnet.parameters_update(sess, var_update_placeholders, var_update_ops, var_list_task1) train_writer.add_summary(summary_geo1_tr, i) test_writer.add_summary(summary_geo1_ts, i) print('Accuracy at step %s: %s' % (i, acc_geo1)) else: geopath_set[first] = np.copy(geopath_set[second]) pathnet.mutation(geopath_set[first], FLAGS.L, FLAGS.M, FLAGS.N) pathnet.parameters_update(sess, var_update_placeholders, var_update_ops, var_list_task2) train_writer.add_summary(summary_geo2_tr, i) test_writer.add_summary(summary_geo2_ts, i) print('Accuracy at step %s: %s' % (i, acc_geo2)) if (acc_geo1 > acc_geo2): task2_acc = acc_geo1 else: task2_acc = acc_geo2 print("CIFAR10 Acc:" + str(task1_acc) + ",SVHN:" + str(task2_acc)) train_writer.close() test_writer.close()
def train(): # Import data mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True, fake_data=FLAGS.fake_data) total_tr_data, total_tr_label = mnist.train.next_batch( mnist.train._num_examples) # Gathering a1 Data tr_data_a1 = total_tr_data[(total_tr_label[:, FLAGS.a1] == 1.0)] for i in range(len(tr_data_a1)): for j in range(len(tr_data_a1[0])): rand_num = np.random.rand() if (rand_num >= 0.5): tr_data_a1[i, j] = np.minimum(tr_data_a1[i, j] + rand_num, 1.0) # Gathering a2 Data tr_data_a2 = total_tr_data[(total_tr_label[:, FLAGS.a2] == 1.0)] for i in range(len(tr_data_a2)): for j in range(len(tr_data_a2[0])): rand_num = np.random.rand() if (rand_num >= 0.5): tr_data_a2[i, j] = np.minimum(tr_data_a2[i, j] + rand_num, 1.0) # Gathering b1 Data tr_data_b1 = total_tr_data[(total_tr_label[:, FLAGS.b1] == 1.0)] for i in range(len(tr_data_b1)): for j in range(len(tr_data_b1[0])): rand_num = np.random.rand() if (rand_num >= 0.5): tr_data_b1[i, j] = np.minimum(tr_data_b1[i, j] + rand_num, 1.0) # Gathering b2 Data tr_data_b2 = total_tr_data[(total_tr_label[:, FLAGS.b2] == 1.0)] for i in range(len(tr_data_b2)): for j in range(len(tr_data_b2[0])): rand_num = np.random.rand() if (rand_num >= 0.5): tr_data_b2[i, j] = np.minimum(tr_data_b2[i, j] + rand_num, 1.0) ## TASK 1 sess = tf.InteractiveSession() # Input placeholders with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 784], name='x-input') y_ = tf.placeholder(tf.float32, [None, 2], name='y-input') with tf.name_scope('input_reshape'): image_shaped_input = tf.reshape(x, [-1, 28, 28, 1]) tf.summary.image('input', image_shaped_input, 2) # geopath_examples geopath = pathnet.geopath_initializer(FLAGS.L, FLAGS.M) # fixed weights list fixed_list = np.ones((FLAGS.L, FLAGS.M), dtype=str) for i in range(FLAGS.L): for j in range(FLAGS.M): fixed_list[i, j] = '0' # Hidden Layers weights_list = np.zeros((FLAGS.L, FLAGS.M), dtype=object) biases_list = np.zeros((FLAGS.L, FLAGS.M), dtype=object) for i in range(FLAGS.L): for j in range(FLAGS.M): if (i == 0): weights_list[i, j] = pathnet.module_weight_variable( [784, FLAGS.filt]) biases_list[i, j] = pathnet.module_bias_variable([FLAGS.filt]) else: weights_list[i, j] = pathnet.module_weight_variable( [FLAGS.filt, FLAGS.filt]) biases_list[i, j] = pathnet.module_bias_variable([FLAGS.filt]) for i in range(FLAGS.L): layer_modules_list = np.zeros(FLAGS.M, dtype=object) for j in range(FLAGS.M): if (i == 0): layer_modules_list[j] = pathnet.module( x, weights_list[i, j], biases_list[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) * geopath[i, j] else: layer_modules_list[j] = pathnet.module2( j, net, weights_list[i, j], biases_list[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) * geopath[i, j] net = np.sum(layer_modules_list) # Output Layer output_weights = pathnet.module_weight_variable([FLAGS.filt, 2]) output_biases = pathnet.module_bias_variable([2]) y = pathnet.nn_layer(net, output_weights, output_biases, 'output_layer') # Cross Entropy with tf.name_scope('cross_entropy'): diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) # Need to learn variables var_list_to_learn = [] + output_weights + output_biases for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '0'): var_list_to_learn += weights_list[i, j] + biases_list[i, j] # GradientDescent with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer( FLAGS.learning_rate).minimize(cross_entropy, var_list=var_list_to_learn) # Accuracy with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # Merge all the summaries and write them out to /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test') tf.global_variables_initializer().run() # Make a TensorFlow feed_dict: maps data onto Tensor placeholders. def feed_dict(train, batch_num=0, tr_flag1=0, tr_flag2=0): if train or FLAGS.fake_data: x_1 = tr_data_a1[tr_flag1:tr_flag1 + batch_num, :] x_2 = tr_data_a2[tr_flag2:tr_flag2 + batch_num, :] if (len(x_1) < batch_num): x_1 = np.append(x_1, tr_data_a1[:(tr_flag1 + batch_num) % len(tr_data_a1), :], axis=0) if (len(x_2) < batch_num): x_2 = np.append(x_2, tr_data_a2[:(tr_flag2 + batch_num) % len(tr_data_a2), :], axis=0) xs = np.append(x_1, x_2, axis=0) ys = np.zeros((batch_num * 2, 2), dtype=float) for i in range(len(ys)): if (i < batch_num): ys[i, 0] = 1.0 else: ys[i, 1] = 1.0 return {x: xs, y_: ys} # Generating randomly geopath geopath_set = np.zeros(FLAGS.candi, dtype=object) for i in range(FLAGS.candi): geopath_set[i] = pathnet.get_geopath(FLAGS.L, FLAGS.M, FLAGS.N) # parameters placeholders and ops var_update_ops = np.zeros(len(var_list_to_learn), dtype=object) var_update_placeholders = np.zeros(len(var_list_to_learn), dtype=object) for i in range(len(var_list_to_learn)): var_update_placeholders[i] = tf.placeholder( var_list_to_learn[i].dtype, shape=var_list_to_learn[i].get_shape()) var_update_ops[i] = var_list_to_learn[i].assign( var_update_placeholders[i]) # geopathes placeholders and ops geopath_update_ops = np.zeros((len(geopath), len(geopath[0])), dtype=object) geopath_update_placeholders = np.zeros((len(geopath), len(geopath[0])), dtype=object) for i in range(len(geopath)): for j in range(len(geopath[0])): geopath_update_placeholders[i, j] = tf.placeholder( geopath[i, j].dtype, shape=geopath[i, j].get_shape()) geopath_update_ops[i, j] = geopath[i, j].assign( geopath_update_placeholders[i, j]) tr_flag1 = 0 tr_flag2 = 0 data_num1 = len(tr_data_a1) data_num2 = len(tr_data_a2) for i in range(FLAGS.max_steps): # Select Two Candidate to Tournament first, second = pathnet.select_two_candi(FLAGS.candi) # First Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_set[first], FLAGS.L, FLAGS.M) acc_geo1_tr = 0 #var_list_backup=pathnet.parameters_backup(var_list_to_learn); #tr_flag_bak=tr_flag; for j in range(FLAGS.T): summary_geo1_tr, _, acc_geo1_tmp = sess.run( [merged, train_step, accuracy], feed_dict=feed_dict(True, int(FLAGS.batch_num / 2), int(tr_flag1), int(tr_flag2))) tr_flag1 = (tr_flag1 + FLAGS.batch_num / 2) % data_num1 tr_flag2 = (tr_flag2 + FLAGS.batch_num / 2) % data_num2 acc_geo1_tr += acc_geo1_tmp #var_list_task1=pathnet.parameters_backup(var_list_to_learn); #tr_flag=tr_flag_bak; # Second Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_set[second], FLAGS.L, FLAGS.M) acc_geo2_tr = 0 #pathnet.parameters_update(sess,var_update_placeholders,var_update_ops,var_list_backup); for j in range(FLAGS.T): summary_geo2_tr, _, acc_geo2_tmp = sess.run( [merged, train_step, accuracy], feed_dict=feed_dict(True, int(FLAGS.batch_num / 2), int(tr_flag1), int(tr_flag2))) tr_flag1 = (tr_flag1 + FLAGS.batch_num / 2) % data_num1 tr_flag2 = (tr_flag2 + FLAGS.batch_num / 2) % data_num2 acc_geo2_tr += acc_geo2_tmp #var_list_task2=pathnet.parameters_backup(var_list_to_learn); # Compatition between two cases if (acc_geo1_tr > acc_geo2_tr): geopath_set[second] = np.copy(geopath_set[first]) geopath_set[second] = pathnet.mutation(geopath_set[second], FLAGS.L, FLAGS.M, FLAGS.N) #pathnet.parameters_update(sess,var_update_placeholders,var_update_ops,var_list_task1); train_writer.add_summary(summary_geo1_tr, i) print('Training Accuracy at step %s: %s' % (i, acc_geo1_tr / FLAGS.T)) if (acc_geo1_tr / FLAGS.T >= 0.998): print('Learning Done!!') print('Optimal Path is as followed.') print(geopath_set[first]) task1_optimal_path = geopath_set[first] break else: geopath_set[first] = np.copy(geopath_set[second]) geopath_set[first] = pathnet.mutation(geopath_set[first], FLAGS.L, FLAGS.M, FLAGS.N) #pathnet.parameters_update(sess,var_update_placeholders,var_update_ops,var_list_task2); train_writer.add_summary(summary_geo2_tr, i) print('Training Accuracy at step %s: %s' % (i, acc_geo2_tr / FLAGS.T)) if (acc_geo2_tr / FLAGS.T >= 0.998): print('Learning Done!!') print('Optimal Path is as followed.') print(geopath_set[second]) task1_optimal_path = geopath_set[second] break iter_task1 = i # Fix task1 Optimal Path for i in range(FLAGS.L): for j in range(FLAGS.M): if (task1_optimal_path[i, j] == 1.0): fixed_list[i, j] = '1' # Get variables of fixed list var_list_to_fix = [] for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '1'): var_list_to_fix += weights_list[i, j] + biases_list[i, j] var_list_fix = pathnet.parameters_backup(var_list_to_fix) # parameters placeholders and ops var_fix_ops = np.zeros(len(var_list_to_fix), dtype=object) var_fix_placeholders = np.zeros(len(var_list_to_fix), dtype=object) for i in range(len(var_list_to_fix)): var_fix_placeholders[i] = tf.placeholder( var_list_to_fix[i].dtype, shape=var_list_to_fix[i].get_shape()) var_fix_ops[i] = var_list_to_fix[i].assign(var_fix_placeholders[i]) ## TASK 2 # Need to learn variables var_list_to_learn = [] + output_weights + output_biases for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '0'): var_list_to_learn += weights_list[i, j] + biases_list[i, j] # Fixed variables var_list_to_fix = [] for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '1'): var_list_to_fix += weights_list[i, j] + biases_list[i, j] for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '1'): tmp = biases_list[i, j][0] break break # Initialization tf.global_variables_initializer().run() # Update fixed values pathnet.parameters_update(sess, var_fix_placeholders, var_fix_ops, var_list_fix) # GradientDescent with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer( FLAGS.learning_rate).minimize(cross_entropy, var_list=var_list_to_learn) # Make a TensorFlow feed_dict: maps data onto Tensor placeholders. def feed_dict(train, batch_num, tr_flag1=0, tr_flag2=0): if train or FLAGS.fake_data: x_1 = tr_data_b1[tr_flag1:tr_flag1 + batch_num, :] x_2 = tr_data_b2[tr_flag2:tr_flag2 + batch_num, :] if (len(x_1) < batch_num): x_1 = np.append(x_1, tr_data_b1[:(tr_flag1 + batch_num) % len(tr_data_b1), :], axis=0) if (len(x_2) < batch_num): x_2 = np.append(x_2, tr_data_b2[:(tr_flag2 + batch_num) % len(tr_data_b2), :], axis=0) xs = np.append(x_1, x_2, axis=0) ys = np.zeros((batch_num * 2, 2), dtype=float) for i in range(len(ys)): if (i < batch_num): ys[i, 0] = 1.0 else: ys[i, 1] = 1.0 return {x: xs, y_: ys} # Generating randomly geopath geopath_set = np.zeros(FLAGS.candi, dtype=object) for i in range(FLAGS.candi): geopath_set[i] = pathnet.get_geopath(FLAGS.L, FLAGS.M, FLAGS.N) # parameters placeholders and ops var_update_ops = np.zeros(len(var_list_to_learn), dtype=object) var_update_placeholders = np.zeros(len(var_list_to_learn), dtype=object) for i in range(len(var_list_to_learn)): var_update_placeholders[i] = tf.placeholder( var_list_to_learn[i].dtype, shape=var_list_to_learn[i].get_shape()) var_update_ops[i] = var_list_to_learn[i].assign( var_update_placeholders[i]) tr_flag1 = 0 tr_flag2 = 0 data_num1 = len(tr_data_b1) data_num2 = len(tr_data_b2) for i in range(FLAGS.max_steps): # Select Two Candidate to Tournament first, second = pathnet.select_two_candi(FLAGS.candi) # First Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_set[first], FLAGS.L, FLAGS.M) acc_geo1_tr = 0 tr_flag1_bak = tr_flag1 tr_flag2_bak = tr_flag2 #var_list_backup=pathnet.parameters_backup(var_list_to_learn); #tr_flag_bak=tr_flag; for j in range(FLAGS.T): summary_geo1_tr, _ = sess.run([merged, train_step], feed_dict=feed_dict( True, int(FLAGS.batch_num / 2), int(tr_flag1), int(tr_flag2))) tr_flag1 = (tr_flag1 + FLAGS.batch_num / 2) % data_num1 tr_flag2 = (tr_flag2 + FLAGS.batch_num / 2) % data_num2 tr_flag1 = tr_flag1_bak tr_flag2 = tr_flag2_bak for j in range(FLAGS.T): acc_geo1_tmp = sess.run([accuracy], feed_dict=feed_dict( True, int(FLAGS.batch_num / 2), int(tr_flag1), int(tr_flag2))) tr_flag1 = (tr_flag1 + FLAGS.batch_num / 2) % data_num1 tr_flag2 = (tr_flag2 + FLAGS.batch_num / 2) % data_num2 acc_geo1_tr += acc_geo1_tmp[0] #var_list_task1=pathnet.parameters_backup(var_list_to_learn); #tr_flag=tr_flag_bak; # Second Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_set[second], FLAGS.L, FLAGS.M) acc_geo2_tr = 0 tr_flag1 = tr_flag1_bak tr_flag2 = tr_flag2_bak #pathnet.parameters_update(sess,var_update_placeholders,var_update_ops,var_list_backup); for j in range(FLAGS.T): summary_geo2_tr, _ = sess.run([merged, train_step], feed_dict=feed_dict( True, int(FLAGS.batch_num / 2), int(tr_flag1), int(tr_flag2))) tr_flag1 = (tr_flag1 + FLAGS.batch_num / 2) % data_num1 tr_flag2 = (tr_flag2 + FLAGS.batch_num / 2) % data_num2 tr_flag1 = tr_flag1_bak tr_flag2 = tr_flag2_bak for j in range(FLAGS.T): acc_geo2_tmp = sess.run([accuracy], feed_dict=feed_dict( True, int(FLAGS.batch_num / 2), int(tr_flag1), int(tr_flag2))) tr_flag1 = (tr_flag1 + FLAGS.batch_num / 2) % data_num1 tr_flag2 = (tr_flag2 + FLAGS.batch_num / 2) % data_num2 acc_geo2_tr += acc_geo2_tmp[0] #var_list_task2=pathnet.parameters_backup(var_list_to_learn); # Compatition between two cases if (acc_geo1_tr > acc_geo2_tr): geopath_set[second] = np.copy(geopath_set[first]) pathnet.mutation(geopath_set[second], FLAGS.L, FLAGS.M, FLAGS.N) #pathnet.parameters_update(sess,var_update_placeholders,var_update_ops,var_list_task1); train_writer.add_summary(summary_geo1_tr, i) print('Training Accuracy at step %s: %s' % (i, acc_geo1_tr / FLAGS.T)) if (acc_geo1_tr / FLAGS.T >= 0.998): print('Learning Done!!') print('Optimal Path is as followed.') print(geopath_set[first]) task2_optimal_path = geopath_set[first] break else: geopath_set[first] = np.copy(geopath_set[second]) pathnet.mutation(geopath_set[first], FLAGS.L, FLAGS.M, FLAGS.N) #pathnet.parameters_update(sess,var_update_placeholders,var_update_ops,var_list_task2); train_writer.add_summary(summary_geo2_tr, i) print('Training Accuracy at step %s: %s' % (i, acc_geo2_tr / FLAGS.T)) if (acc_geo2_tr / FLAGS.T >= 0.998): print('Learning Done!!') print('Optimal Path is as followed.') print(geopath_set[second]) task2_optimal_path = geopath_set[second] break iter_task2 = i overlap = 0 for i in range(len(task1_optimal_path)): for j in range(len(task1_optimal_path[0])): if (task1_optimal_path[i, j] == task2_optimal_path[i, j]) & ( task1_optimal_path[i, j] == 1.0): overlap += 1 print("Entire Iter:" + str(iter_task1 + iter_task2) + ",Overlap:" + str(overlap)) train_writer.close() test_writer.close()
def train(): # Import data mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True, fake_data=FLAGS.fake_data) total_tr_data, total_tr_label = mnist.train.next_batch( mnist.train._num_examples) # Gathering a1 Data tr_data_a1 = total_tr_data[(total_tr_label[:, FLAGS.a1] == 1.0)] for i in range(len(tr_data_a1)): for j in range(len(tr_data_a1[0])): rand_num = np.random.rand() if (rand_num >= 0.5): tr_data_a1[i, j] = np.minimum(tr_data_a1[i, j] + rand_num, 1.0) # Gathering a2 Data tr_data_a2 = total_tr_data[(total_tr_label[:, FLAGS.a2] == 1.0)] for i in range(len(tr_data_a2)): for j in range(len(tr_data_a2[0])): rand_num = np.random.rand() if (rand_num >= 0.5): tr_data_a2[i, j] = np.minimum(tr_data_a2[i, j] + rand_num, 1.0) # Gathering b1 Data tr_data_b1 = total_tr_data[(total_tr_label[:, FLAGS.b1] == 1.0)] for i in range(len(tr_data_b1)): for j in range(len(tr_data_b1[0])): rand_num = np.random.rand() if (rand_num >= 0.5): tr_data_b1[i, j] = np.minimum(tr_data_b1[i, j] + rand_num, 1.0) # Gathering b2 Data tr_data_b2 = total_tr_data[(total_tr_label[:, FLAGS.b2] == 1.0)] for i in range(len(tr_data_b2)): for j in range(len(tr_data_b2[0])): rand_num = np.random.rand() if (rand_num >= 0.5): tr_data_b2[i, j] = np.minimum(tr_data_b2[i, j] + rand_num, 1.0) tr_data1 = np.append(tr_data_a1, tr_data_a2, axis=0) tr_label1 = np.zeros((len(tr_data1), 2), dtype=float) for i in range(len(tr_data1)): if (i < len(tr_data_a1)): tr_label1[i, 0] = 1.0 else: tr_label1[i, 1] = 1.0 tr_data2 = np.append(tr_data_b1, tr_data_b2, axis=0) tr_label2 = np.zeros((len(tr_data2), 2), dtype=float) for i in range(len(tr_data2)): if (i < len(tr_data_b1)): tr_label2[i, 0] = 1.0 else: tr_label2[i, 1] = 1.0 ## TASK 1 sess = tf.InteractiveSession() # Input placeholders with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 784], name='x-input') y_ = tf.placeholder(tf.float32, [None, 2], name='y-input') with tf.name_scope('input_reshape'): image_shaped_input = tf.reshape(x, [-1, 28, 28, 1]) tf.summary.image('input', image_shaped_input, 2) # geopath_examples geopath = pathnet.geopath_initializer(FLAGS.L, FLAGS.M) # fixed weights list fixed_list = np.ones((FLAGS.L, FLAGS.M), dtype=str) for i in range(FLAGS.L): for j in range(FLAGS.M): fixed_list[i, j] = '0' # Hidden Layers weights_list = np.zeros( (FLAGS.L, FLAGS.M), dtype=object) # weights_list also record conv_kernels biases_list = np.zeros((FLAGS.L, FLAGS.M), dtype=object) # model define layer_modules_list = np.zeros(FLAGS.M, dtype=object) # conv layer i = 0 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.conv_module(image_shaped_input, FLAGS.filt, [5, 5], geopath[i, j], 1, 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # res-fire layer i = 1 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.res_fire_layer( net, FLAGS.filt, 10, 10, geopath[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # dimensionality_reduction layer i = 2 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.Dimensionality_reduction_module( net, 10, geopath[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # conv layer i = 3 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.conv_module(image_shaped_input, FLAGS.filt, [5, 5], geopath[i, j], 1, 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # output layer # reshape _shape = net.shape[1:] _length = 1 for _i in _shape: _length *= int(_i) net = tf.reshape(net, [-1, _length]) # full connection layer y, output_weights, output_biases = pathnet.nn_layer( net, 2, 'output_layer') # Cross Entropy with tf.name_scope('cross_entropy'): diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) # Need to learn variables var_list_to_learn = [] + output_weights + output_biases for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '0'): var_list_to_learn += weights_list[i, j] + biases_list[i, j] # GradientDescent with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer( FLAGS.learning_rate).minimize(cross_entropy, var_list=var_list_to_learn) # Accuracy with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # Merge all the summaries and write them out to /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train1', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test1') tf.global_variables_initializer().run() # Generating randomly geopath geopath_set = np.zeros(FLAGS.candi, dtype=object) for i in range(FLAGS.candi): geopath_set[i] = pathnet.get_geopath(FLAGS.L, FLAGS.M, FLAGS.N) # parameters placeholders and ops var_update_ops = np.zeros(len(var_list_to_learn), dtype=object) var_update_placeholders = np.zeros(len(var_list_to_learn), dtype=object) for i in range(len(var_list_to_learn)): var_update_placeholders[i] = tf.placeholder( var_list_to_learn[i].dtype, shape=var_list_to_learn[i].get_shape()) var_update_ops[i] = var_list_to_learn[i].assign( var_update_placeholders[i]) # geopathes placeholders and ops geopath_update_ops = np.zeros((len(geopath), len(geopath[0])), dtype=object) geopath_update_placeholders = np.zeros((len(geopath), len(geopath[0])), dtype=object) for i in range(len(geopath)): for j in range(len(geopath[0])): geopath_update_placeholders[i, j] = tf.placeholder( geopath[i, j].dtype, shape=geopath[i, j].get_shape()) geopath_update_ops[i, j] = geopath[i, j].assign( geopath_update_placeholders[i, j]) acc_geo = np.zeros(FLAGS.B, dtype=float) summary_geo = np.zeros(FLAGS.B, dtype=object) for i in range(FLAGS.max_steps): # Select Candidates to Tournament compet_idx = range(FLAGS.candi) np.random.shuffle(compet_idx) compet_idx = compet_idx[:FLAGS.B] # Learning & Evaluating for j in range(len(compet_idx)): # Shuffle the data idx = range(len(tr_data1)) np.random.shuffle(idx) tr_data1 = tr_data1[idx] tr_label1 = tr_label1[idx] # Insert Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_set[compet_idx[j]], FLAGS.L, FLAGS.M) acc_geo_tr = 0 for k in range(FLAGS.T): summary_geo_tr, _, acc_geo_tmp = sess.run( [merged, train_step, accuracy], feed_dict={ x: tr_data1[k * FLAGS.batch_num:(k + 1) * FLAGS.batch_num, :], y_: tr_label1[k * FLAGS.batch_num:(k + 1) * FLAGS.batch_num, :] }) acc_geo_tr += acc_geo_tmp acc_geo[j] = acc_geo_tr / FLAGS.T summary_geo[j] = summary_geo_tr # Tournament winner_idx = np.argmax(acc_geo) acc = acc_geo[winner_idx] summary = summary_geo[winner_idx] # Copy and Mutation for j in range(len(compet_idx)): if (j != winner_idx): geopath_set[compet_idx[j]] = np.copy( geopath_set[compet_idx[winner_idx]]) geopath_set[compet_idx[j]] = pathnet.mutation( geopath_set[compet_idx[j]], FLAGS.L, FLAGS.M, FLAGS.N) train_writer.add_summary(summary, i) print('Training Accuracy at step %s: %s' % (i, acc)) if (acc >= 0.99): print('Learning Done!!') print('Optimal Path is as followed.') print(geopath_set[compet_idx[winner_idx]]) task1_optimal_path = geopath_set[compet_idx[winner_idx]] break """ geopath_sum=np.zeros((len(geopath),len(geopath[0])),dtype=float); for j in range(len(geopath_set)): for k in range(len(geopath)): for l in range(len(geopath[0])): geopath_sum[k][l]+=geopath_set[j][k][l]; print(geopath_sum); """ # record steps to find optimal path in task1 iter_task1 = i # Fix task1 Optimal Path for i in range(FLAGS.L): for j in range(FLAGS.M): if (task1_optimal_path[i, j] == 1.0): fixed_list[i, j] = '1' # Get variables of fixed list var_list_to_fix = [] #var_list_to_fix=[]+output_weights+output_biases; for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '1'): var_list_to_fix += weights_list[i, j] + biases_list[i, j] var_list_fix = pathnet.parameters_backup(var_list_to_fix) """ for i in range(FLAGS.L): for j in range(FLAGS.M): if(task1_optimal_path[i,j]==1.0): fixed_list[i,j]='0'; """ # parameters placeholders and ops var_fix_ops = np.zeros(len(var_list_to_fix), dtype=object) var_fix_placeholders = np.zeros(len(var_list_to_fix), dtype=object) for i in range(len(var_list_to_fix)): var_fix_placeholders[i] = tf.placeholder( var_list_to_fix[i].dtype, shape=var_list_to_fix[i].get_shape()) var_fix_ops[i] = var_list_to_fix[i].assign(var_fix_placeholders[i]) ## TASK 2 # Need to learn variables var_list_to_learn = [] + output_weights + output_biases for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '0'): var_list_to_learn += weights_list[i, j] + biases_list[i, j] ''' for i in range(FLAGS.L): for j in range(FLAGS.M): if(fixed_list[i,j]=='1'): tmp=biases_list[i,j][0]; break; break; ''' # Initialization merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train2', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test2') tf.global_variables_initializer().run() # Update fixed values pathnet.parameters_update(sess, var_fix_placeholders, var_fix_ops, var_list_fix) # GradientDescent with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer( FLAGS.learning_rate).minimize(cross_entropy, var_list=var_list_to_learn) # Generating randomly geopath geopath_set = np.zeros(FLAGS.candi, dtype=object) for i in range(FLAGS.candi): geopath_set[i] = pathnet.get_geopath(FLAGS.L, FLAGS.M, FLAGS.N) # parameters placeholders and ops var_update_ops = np.zeros(len(var_list_to_learn), dtype=object) var_update_placeholders = np.zeros(len(var_list_to_learn), dtype=object) for i in range(len(var_list_to_learn)): var_update_placeholders[i] = tf.placeholder( var_list_to_learn[i].dtype, shape=var_list_to_learn[i].get_shape()) var_update_ops[i] = var_list_to_learn[i].assign( var_update_placeholders[i]) acc_geo = np.zeros(FLAGS.B, dtype=float) summary_geo = np.zeros(FLAGS.B, dtype=object) for i in range(FLAGS.max_steps): # Select Candidates to Tournament compet_idx = range(FLAGS.candi) np.random.shuffle(compet_idx) compet_idx = compet_idx[:FLAGS.B] # Learning & Evaluating for j in range(len(compet_idx)): # Shuffle the data idx = range(len(tr_data2)) np.random.shuffle(idx) tr_data2 = tr_data2[idx] tr_label2 = tr_label2[idx] geopath_insert = np.copy(geopath_set[compet_idx[j]]) for l in range(FLAGS.L): for m in range(FLAGS.M): if (fixed_list[l, m] == '1'): geopath_insert[l, m] = 1.0 # Insert Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_insert, FLAGS.L, FLAGS.M) acc_geo_tr = 0 for k in range(FLAGS.T): summary_geo_tr, _, acc_geo_tmp = sess.run( [merged, train_step, accuracy], feed_dict={ x: tr_data2[k * FLAGS.batch_num:(k + 1) * FLAGS.batch_num, :], y_: tr_label2[k * FLAGS.batch_num:(k + 1) * FLAGS.batch_num, :] }) acc_geo_tr += acc_geo_tmp acc_geo[j] = acc_geo_tr / FLAGS.T summary_geo[j] = summary_geo_tr # Tournament winner_idx = np.argmax(acc_geo) acc = acc_geo[winner_idx] summary = summary_geo[winner_idx] # Copy and Mutation for j in range(len(compet_idx)): if (j != winner_idx): geopath_set[compet_idx[j]] = np.copy( geopath_set[compet_idx[winner_idx]]) geopath_set[compet_idx[j]] = pathnet.mutation( geopath_set[compet_idx[j]], FLAGS.L, FLAGS.M, FLAGS.N) train_writer.add_summary(summary, i) print('Training Accuracy at step %s: %s' % (i, acc)) if (acc >= 0.99): print('Learning Done!!') print('Optimal Path is as followed.') print(geopath_set[compet_idx[winner_idx]]) task2_optimal_path = geopath_set[compet_idx[winner_idx]] break """ geopath_sum=np.zeros((len(geopath),len(geopath[0])),dtype=float); for j in range(len(geopath_set)): for k in range(len(geopath)): for l in range(len(geopath[0])): geopath_sum[k][l]+=geopath_set[j][k][l]; print(geopath_sum); """ iter_task2 = i overlap = 0 for i in range(len(task1_optimal_path)): for j in range(len(task1_optimal_path[0])): if (task1_optimal_path[i, j] == task2_optimal_path[i, j]) & ( task1_optimal_path[i, j] == 1.0): overlap += 1 print("Entire Iter:" + str(iter_task1 + iter_task2) + ",TASK1:" + str(iter_task1) + ",TASK2:" + str(iter_task2) + ",Overlap:" + str(overlap)) train_writer.close() test_writer.close()
def train(): ## Get imageNet dataset file queue for task1 and task2 tr_data1, tr_label1 = imagenet_data.create_file_queue( FLAGS.imagenet_data_dir1) tr_data2, tr_label2 = imagenet_data.create_file_queue( FLAGS.imagenet_data_dir2) ## TASK 1 sess = tf.InteractiveSession() # Input placeholders with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 224 * 224 * 3], name='x-input') y_ = tf.placeholder(tf.float32, [None, 10], name='y-input') with tf.name_scope('input_reshape'): image_shaped_input = tf.reshape(x, [-1, 224, 224, 3]) tf.summary.image('input', image_shaped_input, 2) # geopath_examples geopath = pathnet.geopath_initializer(FLAGS.L, FLAGS.M) # fixed weights list fixed_list = np.ones((FLAGS.L, FLAGS.M), dtype=str) for i in range(FLAGS.L): for j in range(FLAGS.M): fixed_list[i, j] = '0' # Hidden Layers weights_list = np.zeros((FLAGS.L, FLAGS.M), dtype=object) biases_list = np.zeros((FLAGS.L, FLAGS.M), dtype=object) # model define layer_modules_list = np.zeros(FLAGS.M, dtype=object) # conv layer i = 0 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.conv_module(image_shaped_input, FLAGS.filt, [11, 11], geopath[i, j], 1, 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # dimensionality_reduction layer i = 1 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.Dimensionality_reduction_module( net, FLAGS.filt / 2, geopath[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # res_fire layer i = 2 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.res_fire_layer( net, FLAGS.filt / 2, geopath[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # dimensionality_reduction layer i = 3 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.Dimensionality_reduction_module( net, FLAGS.filt / 2, geopath[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # reshape before full connection layer _shape = net.shape[1:] _length = 1 for _i in _shape: _length *= int(_i) net = tf.reshape(net, [-1, _length]) # model1 layer i = 4 for j in range(FLAGS.M): layer_modules_list[j], weights_list[i, j], biases_list[ i, j] = pathnet.module(net, FLAGS.full_connection_filt, geopath[i, j], 'layer' + str(i + 1) + "_" + str(j + 1)) net = np.sum(layer_modules_list) / FLAGS.M # output layer y, output_weights, output_biases = pathnet.nn_layer( net, 10, 'output_layer') # Cross Entropy with tf.name_scope('cross_entropy'): diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) # Need to learn variables var_list_to_learn = [] + output_weights + output_biases for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '0'): var_list_to_learn += weights_list[i, j] + biases_list[i, j] # GradientDescent with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer( FLAGS.learning_rate).minimize(cross_entropy, var_list=var_list_to_learn) # Accuracy with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # Merge all the summaries and write them out to /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train1', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test1') # init tf.global_variables_initializer().run() tf.local_variables_initializer().run() # start data reading queue coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # Generating randomly geopath geopath_set = np.zeros(FLAGS.candi, dtype=object) for i in range(FLAGS.candi): geopath_set[i] = pathnet.get_geopath(FLAGS.L, FLAGS.M, FLAGS.N) # parameters placeholders and ops var_update_ops = np.zeros(len(var_list_to_learn), dtype=object) var_update_placeholders = np.zeros(len(var_list_to_learn), dtype=object) for i in range(len(var_list_to_learn)): var_update_placeholders[i] = tf.placeholder( var_list_to_learn[i].dtype, shape=var_list_to_learn[i].get_shape()) var_update_ops[i] = var_list_to_learn[i].assign( var_update_placeholders[i]) # geopathes placeholders and ops geopath_update_ops = np.zeros((len(geopath), len(geopath[0])), dtype=object) geopath_update_placeholders = np.zeros((len(geopath), len(geopath[0])), dtype=object) for i in range(len(geopath)): for j in range(len(geopath[0])): geopath_update_placeholders[i, j] = tf.placeholder( geopath[i, j].dtype, shape=geopath[i, j].get_shape()) geopath_update_ops[i, j] = geopath[i, j].assign( geopath_update_placeholders[i, j]) acc_geo = np.zeros(FLAGS.B, dtype=float) summary_geo = np.zeros(FLAGS.B, dtype=object) for i in range(FLAGS.max_steps): # Select Candidates to Tournament compet_idx = range(FLAGS.candi) np.random.shuffle(compet_idx) compet_idx = compet_idx[:FLAGS.B] # Learning & Evaluating for j in range(len(compet_idx)): # Insert Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_set[compet_idx[j]], FLAGS.L, FLAGS.M) acc_geo_tr = 0 for k in range(FLAGS.T): ''' print(x.shape) print(tr_data1[k*FLAGS.batch_num:(k+1)*FLAGS.batch_num,:].shape) print(y.shape) print(tr_label1[k*FLAGS.batch_num:(k+1)*FLAGS.batch_num,:].shape) ''' tr_data1_val, tr_label1_val = imagenet_data.read_batch( sess, tr_data1, tr_label1, FLAGS.batch_num, FLAGS.imagenet_data_dir1) summary_geo_tr, _, acc_geo_tmp = sess.run( [merged, train_step, accuracy], feed_dict={ x: tr_data1_val, y_: tr_label1_val }) acc_geo_tr += acc_geo_tmp acc_geo[j] = acc_geo_tr / FLAGS.T summary_geo[j] = summary_geo_tr # Tournament winner_idx = np.argmax(acc_geo) acc = acc_geo[winner_idx] summary = summary_geo[winner_idx] # Copy and Mutation for j in range(len(compet_idx)): if (j != winner_idx): geopath_set[compet_idx[j]] = np.copy( geopath_set[compet_idx[winner_idx]]) geopath_set[compet_idx[j]] = pathnet.mutation( geopath_set[compet_idx[j]], FLAGS.L, FLAGS.M, FLAGS.N) train_writer.add_summary(summary, i) print('Training Accuracy at step %s: %s' % (i, acc)) if i == FLAGS.max_steps - 1: acc_task1 = acc task1_optimal_path = geopath_set[compet_idx[winner_idx]] print('Task1 Optimal Path is as followed.') print(task1_optimal_path) # Fix task1 Optimal Path for i in range(FLAGS.L): for j in range(FLAGS.M): if (task1_optimal_path[i, j] == 1.0): fixed_list[i, j] = '1' # Get variables of fixed list var_list_to_fix = [] #var_list_to_fix=[]+output_weights+output_biases; for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '1'): var_list_to_fix += weights_list[i, j] + biases_list[i, j] var_list_fix = pathnet.parameters_backup(var_list_to_fix) # parameters placeholders and ops var_fix_ops = np.zeros(len(var_list_to_fix), dtype=object) var_fix_placeholders = np.zeros(len(var_list_to_fix), dtype=object) for i in range(len(var_list_to_fix)): var_fix_placeholders[i] = tf.placeholder( var_list_to_fix[i].dtype, shape=var_list_to_fix[i].get_shape()) var_fix_ops[i] = var_list_to_fix[i].assign(var_fix_placeholders[i]) ## TASK 2 # Need to learn variables var_list_to_learn = [] + output_weights + output_biases for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '0'): var_list_to_learn += weights_list[i, j] + biases_list[i, j] for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i, j] == '1'): tmp = biases_list[i, j][0] break break # Initialization merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train2', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test2') tf.global_variables_initializer().run() tf.local_variables_initializer().run() # Update fixed values pathnet.parameters_update(sess, var_fix_placeholders, var_fix_ops, var_list_fix) # GradientDescent with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer( FLAGS.learning_rate).minimize(cross_entropy, var_list=var_list_to_learn) # Generating randomly geopath geopath_set = np.zeros(FLAGS.candi, dtype=object) for i in range(FLAGS.candi): geopath_set[i] = pathnet.get_geopath(FLAGS.L, FLAGS.M, FLAGS.N) # parameters placeholders and ops var_update_ops = np.zeros(len(var_list_to_learn), dtype=object) var_update_placeholders = np.zeros(len(var_list_to_learn), dtype=object) for i in range(len(var_list_to_learn)): var_update_placeholders[i] = tf.placeholder( var_list_to_learn[i].dtype, shape=var_list_to_learn[i].get_shape()) var_update_ops[i] = var_list_to_learn[i].assign( var_update_placeholders[i]) acc_geo = np.zeros(FLAGS.B, dtype=float) summary_geo = np.zeros(FLAGS.B, dtype=object) for i in range(FLAGS.max_steps): # Select Candidates to Tournament compet_idx = range(FLAGS.candi) np.random.shuffle(compet_idx) compet_idx = compet_idx[:FLAGS.B] # Learning & Evaluating for j in range(len(compet_idx)): geopath_insert = np.copy(geopath_set[compet_idx[j]]) for l in range(FLAGS.L): for m in range(FLAGS.M): if (fixed_list[l, m] == '1'): geopath_insert[l, m] = 1.0 # Insert Candidate pathnet.geopath_insert(sess, geopath_update_placeholders, geopath_update_ops, geopath_insert, FLAGS.L, FLAGS.M) acc_geo_tr = 0 for k in range(FLAGS.T): tr_data2_val, tr_label2_val = imagenet_data.read_batch( sess, tr_data2, tr_label2, FLAGS.batch_num, FLAGS.imagenet_data_dir2) summary_geo_tr, _, acc_geo_tmp = sess.run( [merged, train_step, accuracy], feed_dict={ x: tr_data2_val, y_: tr_label2_val }) acc_geo_tr += acc_geo_tmp acc_geo[j] = acc_geo_tr / FLAGS.T summary_geo[j] = summary_geo_tr # Tournament winner_idx = np.argmax(acc_geo) acc = acc_geo[winner_idx] summary = summary_geo[winner_idx] # Copy and Mutation for j in range(len(compet_idx)): if (j != winner_idx): geopath_set[compet_idx[j]] = np.copy( geopath_set[compet_idx[winner_idx]]) geopath_set[compet_idx[j]] = pathnet.mutation( geopath_set[compet_idx[j]], FLAGS.L, FLAGS.M, FLAGS.N) train_writer.add_summary(summary, i) print('Training Accuracy at step %s: %s' % (i, acc)) if i == FLAGS.max_steps - 1: acc_task2 = acc task2_optimal_path = geopath_set[compet_idx[winner_idx]] print('Task2 Optimal Path is as followed.') print(task2_optimal_path) # close data reading queue coord.request_stop() coord.join(threads) overlap = 0 for i in range(len(task1_optimal_path)): for j in range(len(task1_optimal_path[0])): if (task1_optimal_path[i, j] == task2_optimal_path[i, j]) & ( task1_optimal_path[i, j] == 1.0): overlap += 1 print("ImageNet,TASK1:" + str(acc_task1) + ",TASK2:" + str(acc_task2) + ", Overlap:" + str(overlap)) train_writer.close() test_writer.close()
def train(): # Import data mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True, fake_data=FLAGS.fake_data) total_tr_data, total_tr_label = mnist.train.next_batch(mnist.train._num_examples); # Gathering 5,6 Data tr_data_5_6=total_tr_data[(total_tr_label[:,5]==1.0)|(total_tr_label[:,6]==1.0)]; for i in range(len(tr_data_5_6)): for j in range(len(tr_data_5_6[0])): rand_num=np.random.rand()*2; if(rand_num<1): if(rand_num<0.5): tr_data_5_6[i,j]=0.0; else: tr_data_5_6[i,j]=1.0; tr_label_5_6=total_tr_label[(total_tr_label[:,5]==1.0)|(total_tr_label[:,6]==1.0)]; tr_label_5_6=tr_label_5_6[:,5:7]; # Gathering 6,7 Data tr_data_6_7=total_tr_data[(total_tr_label[:,6]==1.0)|(total_tr_label[:,7]==1.0)]; for i in range(len(tr_data_6_7)): for j in range(len(tr_data_6_7[0])): rand_num=np.random.rand()*2; if(rand_num<1): if(rand_num<0.5): tr_data_6_7[i,j]=0.0; else: tr_data_6_7[i,j]=1.0; tr_label_6_7=total_tr_label[(total_tr_label[:,6]==1.0)|(total_tr_label[:,7]==1.0)]; tr_label_6_7=tr_label_6_7[:,6:8]; # Gathering 8,9 Data tr_data_8_9=total_tr_data[(total_tr_label[:,8]==1.0)|(total_tr_label[:,9]==1.0)]; for i in range(len(tr_data_8_9)): for j in range(len(tr_data_8_9[0])): rand_num=np.random.rand()*2; if(rand_num<1): if(rand_num<0.5): tr_data_8_9[i,j]=0.0; else: tr_data_8_9[i,j]=1.0; tr_label_8_9=total_tr_label[(total_tr_label[:,8]==1.0)|(total_tr_label[:,9]==1.0)]; tr_label_8_9=tr_label_8_9[:,8:10]; tr_data=tr_data_6_7; tr_label=tr_label_6_7; data_num_len=len(tr_data); ## TASK 1 (5,6 CLASSIFICATION) sess = tf.InteractiveSession() # Create a multilayer model. # Input placeholders with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 784], name='x-input') y_ = tf.placeholder(tf.float32, [None, 2], name='y-input') with tf.name_scope('input_reshape'): image_shaped_input = tf.reshape(x, [-1, 28, 28, 1]) tf.summary.image('input', image_shaped_input, 2) # geopath_examples geopath=pathnet.geopath_initializer(FLAGS.L,FLAGS.M); # fixed weights list fixed_list=np.ones((FLAGS.L,FLAGS.M),dtype=str); for i in range(FLAGS.L): for j in range(FLAGS.M): fixed_list[i,j]='0'; # reinitializing weights list rein_list=np.ones((FLAGS.L,FLAGS.M),dtype=str); for i in range(FLAGS.L): for j in range(FLAGS.M): rein_list[i,j]='0'; # Input Layer """ input_weights=pathnet.module_weight_variable([784,FLAGS.filt]); input_biases=pathnet.module_bias_variable([FLAGS.filt]); net = pathnet.nn_layer(x,input_weights,input_biases,'input_layer'); """ # Hidden Layers weights_list=np.zeros((FLAGS.L,FLAGS.M),dtype=object); biases_list=np.zeros((FLAGS.L,FLAGS.M),dtype=object); for i in range(FLAGS.L): for j in range(FLAGS.M): if(i==0): weights_list[i,j]=pathnet.module_weight_variable([784,FLAGS.filt]); biases_list[i,j]=pathnet.module_bias_variable([FLAGS.filt]); else: weights_list[i,j]=pathnet.module_weight_variable([FLAGS.filt,FLAGS.filt]); biases_list[i,j]=pathnet.module_bias_variable([FLAGS.filt]); for i in range(FLAGS.L): layer_modules_list=np.zeros(FLAGS.M,dtype=object); for j in range(FLAGS.M): if(i==0): layer_modules_list[j]=pathnet.module(x, weights_list[i,j], biases_list[i,j], 'layer'+str(i+1)+"_"+str(j+1))*geopath[i,j]; else: layer_modules_list[j]=pathnet.module(net, weights_list[i,j], biases_list[i,j], 'layer'+str(i+1)+"_"+str(j+1))*geopath[i,j]; net=np.sum(layer_modules_list); """ with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) tf.summary.scalar('dropout_keep_probability', keep_prob) dropped = tf.nn.dropout(hidden1, keep_prob) """ # Do not apply softmax activation yet, see below. output_weights=pathnet.module_weight_variable([FLAGS.filt,2]); output_biases=pathnet.module_bias_variable([2]); y = pathnet.nn_layer(net,output_weights,output_biases,'output_layer', act=tf.identity); with tf.name_scope('cross_entropy'): diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) # Need to learn variables #var_list_to_learn=[]+input_weights+input_biases+output_weights+output_biases; var_list_to_learn=[]+output_weights+output_biases; for i in range(FLAGS.L): for j in range(FLAGS.M): if (fixed_list[i,j]=='0'): var_list_to_learn+=weights_list[i,j]+biases_list[i,j]; with tf.name_scope('train'): train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize( cross_entropy,var_list=var_list_to_learn) with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # Merge all the summaries and write them out to /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test') tf.global_variables_initializer().run() def feed_dict(train,tr_flag=0): if train or FLAGS.fake_data: xs=tr_data[tr_flag:tr_flag+16,:]; ys=tr_label[tr_flag:tr_flag+16,:]; k = FLAGS.dropout else: xs=ts_data;ys=ts_label; k = 1.0 return {x: xs, y_: ys} #return {x: xs, y_: ys, keep_prob: k} tr_flag=0; for i in range(FLAGS.max_steps): # First Candidate acc_geo1_tr=0; for j in range(FLAGS.T): summary_geo1_tr, _, acc_geo1_tmp = sess.run([merged, train_step,accuracy], feed_dict=feed_dict(True,tr_flag)) tr_flag=(tr_flag+16)%data_num_len; acc_geo1_tr+=acc_geo1_tmp; if(True): train_writer.add_summary(summary_geo1_tr, i); print('Training Accuracy at step %s: %s' % (i, acc_geo1_tr/FLAGS.T)); if(acc_geo1_tr/FLAGS.T >= 0.998): print('Learning Done!!'); print('Optimal Path is as followed.'); break; iter_task=i; print("Entire Iter:"+str(iter_task)); train_writer.close() test_writer.close()