def train(base_lr=1e-3, batch_sz=128, gpu_no=0): assert type(gpu_no) == int and gpu_no >= 0 root_path = os.path.dirname(os.path.realpath(__file__)) folder_name = os.path.basename(root_path) log_path = os.path.join(root_path, '../../log_cifar10') if not os.path.exists(log_path): os.mkdir(log_path) log_path = os.path.join(log_path, folder_name) if not os.path.exists(log_path): os.mkdir(log_path) save_path = os.path.join(root_path, '../../model_cifar10') if not os.path.exists(save_path): os.mkdir(save_path) save_path = os.path.join(save_path, folder_name) if not os.path.exists(save_path): os.mkdir(save_path) n_class = 10 batch_sz = batch_sz batch_test = 100 max_epoch = 42500 lr = base_lr momentum = 0.9 is_training = tf.placeholder("bool") data_path = os.path.join(root_path, '../../../data/cifar-10') tr_images, tr_labels = distorted_inputs(data_path, batch_sz) te_images, te_labels = inputs(True, data_path, batch_test) images, labels = tf.cond(is_training, lambda: [tr_images, tr_labels], lambda: [te_images, te_labels]) # images = tf.constant(np.zeros((127,32,32,3)), dtype=np.float32) vgg = VGG() vgg.build(images, n_class, is_training) fit_loss = loss2(vgg.score, labels, n_class, 'c_entropy') loss_op = fit_loss reg_loss_list = tf.losses.get_regularization_losses() if len(reg_loss_list) != 0: reg_loss = tf.add_n(reg_loss_list) loss_op += reg_loss lr_ = tf.placeholder("float") # update_op = tf.train.AdamOptimizer(lr_).minimize(loss_op) update_op = tf.train.MomentumOptimizer(lr_, 0.9).minimize(loss_op) acc_op = tf.reduce_mean( tf.to_float(tf.equal(labels, tf.to_int32(vgg.pred)))) sess = tf.Session() sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) print("") print("====================") print("Log will be saved to: " + log_path) print("") with open(os.path.join(log_path, 'log_test.txt'), 'w'): pass with open(os.path.join(log_path, 'log_train.txt'), 'w'): pass for i in xrange(max_epoch): if i == 20000: lr *= 0.1 elif i == 30000: lr *= 0.1 elif i == 37500: lr *= 0.1 fit, reg, acc, _ = sess.run([fit_loss, reg_loss, acc_op, update_op], { lr_: lr, is_training: True }) if i % 100 == 0 and i != 0: print('====iteration_%d: fit=%.4f, reg=%.4f, acc=%.4f' % (i, fit, reg, acc)) with open(os.path.join(log_path, 'log_train.txt'), 'a') as train_acc_file: train_acc_file.write( '====iteration_%d: fit=%.4f, reg=%.4f, acc=%.4f\n' % (i, fit, reg, acc)) if i % 500 == 0 and i != 0: n_test = 10000 acc = 0.0 for j in xrange(int(n_test / batch_test)): acc = acc + sess.run(acc_op, {is_training: False}) acc = acc * batch_test / float(n_test) print('++++iteration_%d: test acc=%.4f' % (i, acc)) with open(os.path.join(log_path, 'log_test.txt'), 'a') as test_acc_file: test_acc_file.write('++++iteration_%d: test acc=%.4f\n' % (i, acc)) # if i%10000==0 and i!=0: # tf.train.Saver().save(sess, os.path.join(save_path, str(i))) tf.train.Saver().save(sess, os.path.join(save_path, str(i))) n_test = 10000 acc = 0.0 for j in xrange(int(n_test / batch_test)): acc = acc + sess.run(acc_op, {is_training: False}) acc = acc * batch_test / float(n_test) print('++++iteration_%d: test acc=%.4f' % (i, acc)) with open(os.path.join(log_path, 'log_test.txt'), 'a') as test_acc_file: test_acc_file.write('++++iteration_%d: test acc=%.4f\n' % (i, acc))
def train(base_lr, batch_sz, gpu_no, iters): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = gpu_no root_path = os.path.dirname(os.path.realpath(__file__)) log_path = create_dir(os.path.join(root_path, 'log')) acc_count = 0 while acc_count < 100: if os.path.exists( os.path.join(log_path, 'log_test_%02d.txt' % acc_count)): acc_count += 1 else: break assert acc_count < 100 log_train_fname = 'log_train_%02d.txt' % acc_count log_test_fname = 'log_test_%02d.txt' % acc_count n_class = 100 batch_sz = batch_sz batch_test = 100 max_epoch = 75000 lr = base_lr momentum = 0.9 is_training = tf.placeholder("bool") lr_ = tf.placeholder("float") images = tf.placeholder(tf.float32, (None, 32, 32, 3)) labels = tf.placeholder(tf.int32, (None)) vgg = VGG() vgg.build(images, n_class, is_training, lr_) acc_op = tf.reduce_mean( tf.to_float(tf.equal(labels, tf.to_int32(vgg.pred)))) fit_loss = loss2(vgg.score, labels, n_class, 'c_entropy') loss_op = fit_loss allreg = tf.losses.get_regularization_losses() reg_loss_list = [var for var in allreg if 'poles' not in var.name] vreg_loss_list = [var for var in allreg if 'poles' in var.name] reg_loss = tf.add_n(reg_loss_list) loss_op += reg_loss thom_loss_list = tf.get_collection('thomson_loss') thom_loss = tf.add_n(thom_loss_list) loss_op += -thom_loss thom_final_list = tf.get_collection('thomson_final') thom_final = tf.add_n(thom_final_list) loss_op += -thom_final vloss_op = thom_loss + thom_final vreg_loss = tf.add_n(vreg_loss_list) vloss_op += vreg_loss allvars = tf.trainable_variables() normal_vars = [var for var in allvars if 'poles' not in var.name] v_vars = [var for var in allvars if 'poles' in var.name] updatev_op = tf.train.MomentumOptimizer(lr_, 0.9).minimize(vloss_op, var_list=v_vars) update_op = tf.train.MomentumOptimizer(lr_, 0.9).minimize(loss_op, var_list=normal_vars) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) tf.summary.scalar('fit loss', fit_loss) if len(reg_loss_list) != 0: tf.summary.scalar('reg loss', reg_loss) if len(thom_loss_list) != 0: tf.summary.scalar('thomson loss', thom_loss) if len(thom_final_list) != 0: tf.summary.scalar('thomson final loss', thom_final) tf.summary.scalar('learning rate', lr) tf.summary.scalar('accuracy', acc_op) merged = tf.summary.merge_all() print("====================") print("Log will be saved to: " + log_path) with open(os.path.join(log_path, log_train_fname), 'w'): pass with open(os.path.join(log_path, log_test_fname), 'w'): pass for i in range(max_epoch): t = i % 390 if t == 0: idx = np.arange(0, 50000) np.random.shuffle(idx) train_data['data'] = train_data['data'][idx] train_data['fine_labels'] = np.reshape( train_data['fine_labels'], [50000]) train_data['fine_labels'] = train_data['fine_labels'][idx] tr_images, tr_labels = ip.load_train(train_data, batch_sz, t) if i == 30000: lr *= 0.1 elif i == 50000: lr *= 0.1 elif i == 65000: lr *= 0.1 for t in range(iters): sess.run( updatev_op, { lr_: lr, is_training: True, images: tr_images, labels: tr_labels }) summary, fit, reg, thom, thomf, acc, _ = sess.run([ merged, fit_loss, reg_loss, thom_loss, thom_final, acc_op, update_op ], { lr_: lr, is_training: True, images: tr_images, labels: tr_labels }) if i % 100 == 0: print( '====iter_%d: fit=%.4f, reg=%.4f, thom=%.4f, thomf=%.4f, acc=%.4f' % (i, fit, reg, thom, thomf, acc)) with open(os.path.join(log_path, log_train_fname), 'a') as train_acc_file: train_acc_file.write( '====iter_%d: fit=%.4f, reg=%.4f, thom=%.4f, thomf=%.4f, acc=%.4f\n' % (i, fit, reg, thom, thomf, acc)) if i % 500 == 0 and i != 0: n_test = 10000 acc = 0.0 for j in range(int(n_test / batch_test)): te_images, te_labels = ip.load_test( test_data, batch_test, j) acc = acc + sess.run(acc_op, { is_training: False, images: te_images, labels: te_labels }) acc = acc * batch_test / float(n_test) print('++++iter_%d: test acc=%.4f' % (i, acc)) with open(os.path.join(log_path, log_test_fname), 'a') as test_acc_file: test_acc_file.write('++++iter_%d: test acc=%.4f\n' % (i, acc)) n_test = 10000 acc = 0.0 for j in range(int(n_test / batch_test)): te_images, te_labels = ip.load_test(test_data, batch_test, j) acc = acc + sess.run(acc_op, { is_training: False, images: te_images, labels: te_labels }) acc = acc * batch_test / float(n_test) print('++++iter_%d: test acc=%.4f' % (i, acc)) with open(os.path.join(log_path, log_test_fname), 'a') as test_acc_file: test_acc_file.write('++++iter_%d: test acc=%.4f\n' % (i, acc))
def train(base_lr=1e-3, batch_sz=128, gpu_no=0): assert type(gpu_no) == int and gpu_no >= 0 root_path = os.path.dirname(os.path.realpath(__file__)) folder_name = os.path.basename(root_path) log_path = os.path.join(root_path, '../../log') if not os.path.exists(log_path): os.mkdir(log_path) log_path = os.path.join(log_path, folder_name) if not os.path.exists(log_path): os.mkdir(log_path) save_path = os.path.join(root_path, '../../model') if not os.path.exists(save_path): os.mkdir(save_path) save_path = os.path.join(save_path, folder_name) if not os.path.exists(save_path): os.mkdir(save_path) n_class = 100 batch_sz = batch_sz batch_test = 100 max_epoch = 42500 lr = base_lr momentum = 0.9 is_training = tf.placeholder("bool") data_path = os.path.join(root_path, '../../cifar-100-binary') tr_images, tr_labels = distorted_inputs(data_path, batch_sz) te_images, te_labels = inputs(True, data_path, batch_test) images, labels = tf.cond(is_training, lambda: [tr_images, tr_labels], lambda: [te_images, te_labels]) vgg = VGG() vgg.build(images, n_class, is_training) fit_loss = loss2(vgg.score, labels, n_class, 'c_entropy') loss_op = fit_loss reg_loss_list = tf.losses.get_regularization_losses() if len(reg_loss_list) != 0: reg_loss = tf.add_n(reg_loss_list) loss_op += reg_loss orth_loss_list = tf.get_collection('orth_constraint') if len(orth_loss_list) != 0: orth_loss = tf.add_n(orth_loss_list) loss_op += orth_loss lr_ = tf.placeholder("float") key_list = vgg.wp_dict.keys() weight_list = [v for v in tf.trainable_variables() if (v.name in key_list)] wp_op_list = [] for v in weight_list: wp_op_list.append(tf.assign(v, vgg.wp_dict[v.name])) wp_op = tf.group(*wp_op_list) update_op = tf.train.AdamOptimizer(lr_).minimize(loss_op) acc_op = tf.reduce_mean( tf.to_float(tf.equal(labels, tf.to_int32(vgg.pred)))) sess = tf.Session() sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) print print "====================" print "Log will be saved to: " + log_path print with open(os.path.join(log_path, 'log_test.txt'), 'w'): pass with open(os.path.join(log_path, 'log_train.txt'), 'w'): pass for i in xrange(max_epoch): if i == 20000: lr *= 0.1 elif i == 30000: lr *= 0.1 elif i == 37500: lr *= 0.1 if len(orth_loss_list) != 0: fit, reg, orth, acc, _ = sess.run( [fit_loss, reg_loss, orth_loss, acc_op, update_op], { lr_: lr, is_training: True }) if i % 100 == 0 and i != 0: print( '====iteration_%d: fit=%.4f, reg=%.4f, orth=%.4f, acc=%.4f' % (i, fit, reg, orth, acc)) with open(os.path.join(log_path, 'log_train.txt'), 'a') as train_acc_file: train_acc_file.write( '====iteration_%d: fit=%.4f, reg=%.4f, orth=%.4f, acc=%.4f\n' % (i, fit, reg, orth, acc)) else: fit, reg, acc, _ = sess.run( [fit_loss, reg_loss, acc_op, update_op], { lr_: lr, is_training: True }) if i % 100 == 0 and i != 0: print('====iteration_%d: fit=%.4f, reg=%.4f, acc=%.4f' % (i, fit, reg, acc)) with open(os.path.join(log_path, 'log_train.txt'), 'a') as train_acc_file: train_acc_file.write( '====iteration_%d: fit=%.4f, reg=%.4f, acc=%.4f\n' % (i, fit, reg, acc)) if i % 100 == 0 and i != 0: sess.run(wp_op, {lr_: 0.0, is_training: False}) if i % 500 == 0 and i != 0: n_test = 10000 acc = 0.0 for j in xrange(n_test / batch_test): acc = acc + sess.run(acc_op, {is_training: False}) acc = acc * batch_test / float(n_test) print('++++iteration_%d: test acc=%.4f' % (i, acc)) with open(os.path.join(log_path, 'log_test.txt'), 'a') as test_acc_file: test_acc_file.write('++++iteration_%d: test acc=%.4f\n' % (i, acc)) if i % 10000 == 0 and i != 0: tf.train.Saver().save(sess, os.path.join(save_path, str(i))) tf.train.Saver().save(sess, os.path.join(save_path, str(i))) n_test = 10000 acc = 0.0 for j in xrange(n_test / batch_test): acc = acc + sess.run(acc_op, {is_training: False}) acc = acc * batch_test / float(n_test) print('++++iteration_%d: test acc=%.4f' % (i, acc)) with open(os.path.join(log_path, 'log_test.txt'), 'a') as test_acc_file: test_acc_file.write('++++iteration_%d: test acc=%.4f\n' % (i, acc))