def main() -> None: parser = argparse.ArgumentParser() parser.add_argument('parameters', metavar='parameters') args = parser.parse_args() parameters_file_name: str = args.parameters train(lr=LEARNING_RATE, gamma=GAMMA, batch_size=BATCH_SIZE, parameters_file_name=parameters_file_name)
def train(): """Train CGCNN for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get data for training # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): energies, sites_matrices, adj_matrices = cnn.inputs( eval_data=False) # Build a Graph that computes the energy predictions from the # inference model. energies_hat = cnn.inference(sites_matrices, adj_matrices) # Calculate loss. loss = cnn.loss(energies_hat, energies) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cnn.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def train(): print(1) with tf.Graph().as_default(): #print(2) global_step = tf.Variable(0, trainable=False) images, labels = read_record.read_and_decode(FLAGS.data_dir + '/train.tfrecords') image_batch, label_batch = cnn.inputs(images, labels, FLAGS.batch_size) #print(3) logits = cnn.cnn_model(image_batch) loss = cnn.loss(logits, label_batch) #print(4) train_op = cnn.train(loss, global_step, FLAGS.batch_size) #print(5) saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() init = tf.global_variables_initializer() sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) loss_list = [] for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' loss_list.append(loss_value) if step % 465 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = 0 #num_examples_per_step / duration sec_per_batch = float(duration) average_loss_value = np.mean(loss_list) #total_loss_list.append(average_loss_value) loss_list.clear() format_str = ( '%s: epoch %d, loss = %.4f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step / 465, average_loss_value, examples_per_sec, sec_per_batch)) if step % (465 * 30 + 1) == 0: checkpoint_path = os.path.join(FLAGS.checkpoint_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def main(_): ''' main ''' savepath = './checkpoint/face.ckpt' isneedtrain = False if os.path.exists(savepath + '.meta') is False: isneedtrain = True if isneedtrain: # first generate all face log.debug('generateface') generateface([['./image/trainfaces', './image/trainimages']]) pathlabelpair, indextoname = getfileandlabel('./image/trainimages') train_x, train_y = readimage(pathlabelpair) train_x = train_x.astype(np.float32) / 255.0 log.debug('len of train_x : %s', train_x.shape) myconv.train(train_x, train_y, savepath) log.debug('training is over, please run again') else: testfromcamera(savepath)
def experiment(dl_params, model_params, train_params, train_model=False): # create data print("Loading data...", flush=True) dataloader = Dataloader(dl_params, rseed=0) X_train, y_train = dataloader.get_dataset("train") X_valid, y_valid = dataloader.get_dataset("valid") X_test, y_test = dataloader.get_dataset("test") del dataloader # save some memory # convert to np.array X_train = np.stack(X_train, axis=0) X_valid = np.stack(X_valid, axis=0) X_test = np.stack(X_test, axis=0) y_train = np.asarray(y_train) y_valid = np.asarray(y_valid) y_test = np.asarray(y_test) # normalize to between 0 and 1 X_train = X_train.astype("float") / 255.0 X_valid = X_valid.astype("float") / 255.0 X_test = X_test.astype("float") / 255.0 # convert labels to 1-hot vector binarizer = LabelBinarizer() y_train = binarizer.fit_transform(y_train) y_valid = binarizer.fit_transform(y_valid) y_test = binarizer.fit_transform(y_test) print("Building classifier...") # need to add our own "top" FC to make classes=2 clf = DenseNet(model_params) if train_model is True: print("Training classifier...") clf.model = train(train_params, clf.model, X_train, y_train, X_valid, y_valid) elif train_model is False: clf.model = load_model(model_params['load_location']) else: pass # use untrained model del X_train, X_valid, y_train, y_valid # save memory print("Testing classifier...") y_pred = clf.model.predict(X_test) test_report = create_test_report(train_params, y_test, y_pred) print(test_report) keras.backend.clear_session() print("Experiment completed.") print("Session ended.")
def main(_): isneedtrain = False #判断是否需要训练 if os.path.exists(savepath + '.meta') is False: #如果meta不存在,则表示需要进行训练 isneedtrain = True if isneedtrain: generateface([['./image/trainimages', './image/trainfaces']]) #生成人脸图片 pathlabelpair, indextoname = getfileandlabel( './image/trainfaces') #获取文件路径-标签对 v_pathlabelpair, v_indextoname = getfileandlabel( './image/validate') #获取文件路径-标签对 train_x, train_y = readimage(pathlabelpair) #读取图片,得到训练集,和答案 v_train_x, v_train_y = readimage(v_pathlabelpair) #读取图片,得到验证集,和答案 # train_x = train_x.astype(np.float32) / 255.0 train_x = train_x.astype(np.float32) print('训练集大小 : %s', train_x.shape) myconv.train(train_x, train_y, v_train_x, v_train_y, savepath, batch_size) else: # testfromcamera(savepath) pass
def main(args=None): if tf.gfile.Exists(FLAGS.train_dir): tf.gfile.DeleteRecursively(FLAGS.train_dir) tf.gfile.MakeDirs(FLAGS.train_dir) with tf.Graph().as_default(): images, labels = network.train_set() logits = network.inference(images) loss = network.loss(logits, labels) train = network.train(loss, 0.01) summary = tf.merge_all_summaries() init = tf.initialize_all_variables() saver = tf.train.Saver() with tf.Session() as sess: summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) try: for step in range(300): if not coord.should_stop(): _, loss_value = sess.run([train, loss]) print 'Step %d: loss = %.2f' % (step, loss_value) summary_str = sess.run(summary) summary_writer.add_summary(summary_str, step) summary_writer.flush() checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint') saver.save(sess, checkpoint_file, global_step=step) except tf.errors.OutOfRangeError: print 'Done training -- epoch limit reached' finally: coord.request_stop() coord.join(threads)
def run_training(): # for mnist # train_data, test_data, validation_data = input_data.read_data_sets("../data/MNIST_data/") # for cifar-10 train_data, test_data, validation_data = input_data.load_data() with tf.Graph().as_default(): image_pl, label_pl, keep_prob_pl = place_holder(FLAGS.batch_size) logits = nn_structure.inference(image_pl, conv_1_params, max_pool_1_params, conv_2_params, max_pool_2_params, full_connected_units, keep_prob_pl) loss = nn_structure.loss(logits, label_pl) train_op = nn_structure.train(loss, FLAGS.learning_rate) eval_correct = nn_structure.evaluation(logits, label_pl, k=1) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) start_time = time.time() for step in range(FLAGS.max_step): feed_dict = fill_feed_dict(train_data, 0.5, image_pl, label_pl, keep_prob_pl) _, loss_value = sess.run([train_op, loss], feed_dict) if step % 100 == 0: duration = time.time() - start_time print("Step: {:d}, Training Loss: {:.4f}, {:.1f}ms/step". format(step, loss_value, duration * 10)) start_time = time.time() if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_step: print("Train Eval:") do_eval(sess, eval_correct, train_data, image_pl, label_pl, keep_prob_pl) print("Validation Eval:") do_eval(sess, eval_correct, validation_data, image_pl, label_pl, keep_prob_pl) print("Test Eval:") do_eval(sess, eval_correct, test_data, image_pl, label_pl, keep_prob_pl)
def main(): np.random.seed(0) X, y = nist.dataset() epsilon = 1e-4 learning_rate = 0.01 #Train with NIST dataset Model = cnn.train(X, y, nist.get_model()) for i in range(10): X = canvas() Y = np.zeros(10) Y[i] = 1 predicted_prob = 0 while 1 - predicted_prob > epsilon: yhat = Model.forward(X, is_train=false) predicted_prob = yhat[i] dx = Model.backward() ##UGH! Need to vectorize code before doing this. Otherwise weights keep accumulating X -= dx * 0.01 print X
def experiment(dl_params, model_params, train_params, train_model=False): keras.backend.clear_session() # use gpu config = tf.ConfigProto(log_device_placement=True) sess = tf.Session(config=config) keras.backend.set_session(sess) # save parameters print("Saving parameters...") save_dir = train_params['report_dir'] if os.path.exists(save_dir) is False: os.makedirs(save_dir) params_file = open(save_dir + "model_params.txt", "w+") params_file.write(str(model_params)) params_file.close() params_file = open(save_dir + "train_params.txt", "w+") params_file.write(str(train_params)) params_file.close() params_file = open(save_dir + "dl_params.txt", "w+") params_file.write(str(dl_params)) params_file.close() # create data print("Loading data...", flush=True) dataloader = Dataloader(dl_params, rseed=0) X_train, y_train = dataloader.get_dataset_images_and_labels("train") X_valid, y_valid = dataloader.get_dataset_images_and_labels("valid") X_test, y_test = dataloader.get_dataset_images_and_labels("test") #dataloader.print_dataset_files(save_dir=save_dir) del dataloader # save some memory # convert to np.array X_train = np.stack(X_train, axis=0) X_valid = np.stack(X_valid, axis=0) X_test = np.stack(X_test, axis=0) y_train = np.asarray(y_train) y_valid = np.asarray(y_valid) y_test = np.asarray(y_test) # normalize to between 0 and 1Convert X_train = X_train.astype("float") / 255.0 X_valid = X_valid.astype("float") / 255.0 X_test = X_test.astype("float") / 255.0 # convert labels to 1-hot vector binarizer = LabelBinarizer() y_train = binarizer.fit_transform(y_train) y_valid = binarizer.fit_transform(y_valid) y_test = binarizer.fit_transform(y_test) # convert from grayscale to rgb image(LIME requires this...annoying af) if X_train.shape[-1] == 1: # if grayscale print("Converting from grayscale to RGB...") X_train = gray2rgb(X_train.squeeze(axis=-1)) X_valid = gray2rgb(X_valid.squeeze(axis=-1)) X_test = gray2rgb(X_test.squeeze(axis=-1)) print("Building classifier...") # need to add our own "top" FC to make classes=2 clf = CNN(model_params) if train_model is True or model_params['load_location'] == "": print("Training classifier...") clf.model = train(train_params, clf.model, X_train, y_train, X_valid, y_valid) elif train_model is False: clf.model = load_model(model_params['load_location']) else: pass # use untrained model del X_train, X_valid, y_train, y_valid # save memory print("Testing classifier...") y_pred = clf.model.predict(X_test) test_report = create_test_report(train_params, y_test, y_pred) print(test_report) sess.close() keras.backend.clear_session() print("Experiment completed.") print("Session ended.")
import cnn, netloader ''' Set up the data and start training or testing. ''' cfg = open('params.cfg','r') params = cfg.readlines() cfg.close() for i in range(len(params)): params[i] = params[i].strip() nl = netloader.NetLoader(params[0],params[1],params[2], params[7]) cnn.train(int(params[3]),int(params[4]),params[5],nl,reload=params[6]) #cnn.run_validate(nl)
def train(model_config, sess, seed, repeat_state, data_split=None): # Print model_config very_begining = time.time() print('', 'name : {}'.format(model_config['name']), 'logdir : {}'.format(model_config['logdir']), 'dataset : {}'.format(model_config['dataset']), 'train_size : {}'.format(model_config['train_size']), 'learning_rate : {}'.format(model_config['learning_rate']), 'feature : {}'.format(model_config['feature']), 'logging : {}'.format(model_config['logging']), sep='\n') if data_split: adj = data_split['adj'] features = data_split['features'] y_train = data_split['y_train'] y_val = data_split['y_val'] y_test = data_split['y_test'] train_mask = data_split['train_mask'] val_mask = data_split['val_mask'] test_mask = data_split['test_mask'] triplet = data_split['triplet'] else: # Load data adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, size_of_each_class, triplet = \ load_data(model_config['dataset'],train_size=model_config['train_size'], validation_size=model_config['validation_size'], model_config=model_config, shuffle=model_config['shuffle'], repeat_state=repeat_state) stored_A = model_config['dataset'] if model_config['drop_inter_class_edge']: adj = drop_inter_class_edge(adj) stored_A = model_config['dataset'] + '_drop' # preprocess_features begin = time.time() features = smooth(features, adj, model_config['smoothing'], model_config, stored_A=stored_A + '_A_I') print(time.time() - begin, 's') data_split = { 'adj': adj, 'features': features, 'y_train': y_train, 'y_val': y_val, 'y_test': y_test, 'train_mask': train_mask, 'val_mask': val_mask, 'test_mask': test_mask, 'triplet': triplet } laplacian = sparse.diags(adj.sum(1).flat, 0) - adj laplacian = laplacian.astype(np.float32).tocoo() if type(model_config['t']) == int and model_config['t'] < 0: eta = adj.shape[0] / (adj.sum() / adj.shape[0])**len( model_config['connection']) model_config['t'] = (y_train.sum(axis=0) * 3 * eta / y_train.sum()).astype(np.int64) print('t=', model_config['t']) # origin_adj = adj if model_config['Model'] == 0: pass elif model_config['Model'] in [1, 2, 3, 4]: # absorption probability print( 'Calculating Absorption Probability...', # 's :{}'.format(model_config['s']), 'alpha :{}'.format(model_config['alpha']), 'type :{}'.format(model_config['absorption_type']), sep='\n') if model_config['Model'] == 1: adj = Model1(adj, model_config['t'], model_config['alpha'], model_config['absorption_type']) elif model_config['Model'] == 2: adj = Model2(adj, model_config['s'], model_config['alpha'], y_train) elif model_config['Model'] == 3: # original_y_train = y_train y_train, train_mask = Model3(adj, model_config['s'], model_config['alpha'], y_train, train_mask) elif model_config['Model'] == 4: y_train, train_mask = Model4(adj, model_config['s'], model_config['alpha'], y_train, train_mask) elif model_config['Model'] == 5: adj = Model5(features, adj, model_config['mu']) elif model_config['Model'] == 6: adj = Model6(adj) elif model_config['Model'] == 7: y_train, train_mask = Model7(adj, model_config['s'], model_config['alpha'], y_train, train_mask, features) elif model_config['Model'] == 8: # original_y_train = y_train y_train, train_mask = Model8(adj, model_config['s'], model_config['alpha'], y_train, train_mask) elif model_config['Model'] == 9: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') elif model_config['Model'] == 10: y_train, train_mask = Model10(adj, model_config['s'], model_config['t'], model_config['alpha'], y_train, train_mask, features, stored_A=stored_A + '_A_H') elif model_config['Model'] == 11: y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 12: pass elif model_config['Model'] == 13: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 14: y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 15: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 16: with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=model_config['threads']) ) as sub_sess: tf.set_random_seed(seed) test_acc, test_acc_of_class, prediction = train( model_config['Model_to_add_label'], sub_sess, seed, data_split=data_split) y_train, train_mask = Model16(prediction, model_config['t'], y_train, train_mask) model_config = model_config['Model_to_predict'] print('', 'name : {}'.format(model_config['name']), 'logdir : {}'.format(model_config['logdir']), 'dataset : {}'.format(model_config['dataset']), 'train_size : {}'.format(model_config['train_size']), 'learning_rate : {}'.format(model_config['learning_rate']), 'feature : {}'.format(model_config['feature']), 'logging : {}'.format(model_config['logging']), sep='\n') elif model_config['Model'] == 17: if model_config['smoothing'] is not None: stored_A = None adj = construct_knn_graph(features, model_config['k']) else: stored_A = stored_A + '_A_I' if model_config['drop_inter_class_edge']: stored_A = None test_acc, test_acc_of_class, prediction = Model17( adj, model_config['alpha'], y_train, train_mask, y_test, stored_A=stored_A) print("Test set results: accuracy= {:.5f}".format(test_acc)) print("accuracy of each class=", test_acc_of_class) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, prediction, size_of_each_class, time.time( ) - very_begining elif model_config['Model'] == 18: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') alpha = 1e-6 test_acc, test_acc_of_class, prediction = Model17(adj, alpha, y_train, train_mask, y_test, stored_A=stored_A + '_A_I') print("Test set results: accuracy= {:.5f}".format(test_acc)) print("accuracy of each class=", test_acc_of_class) return test_acc, test_acc_of_class, prediction elif model_config['Model'] == 19: with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=model_config['threads']) ) as sub_sess: tf.set_random_seed(seed) test_acc, test_acc_of_class, prediction = train( model_config['Model_to_add_label'], sub_sess, seed, data_split=data_split) stored_A = stored_A + '_A_I' # print(time.time()-very_begining) y_train, train_mask = Model19(prediction, model_config['t'], y_train, train_mask, adj, model_config['alpha'], stored_A, model_config['Model19']) # print(time.time()-very_begining) model_config = model_config['Model_to_predict'] print('', 'name : {}'.format(model_config['name']), 'logdir : {}'.format(model_config['logdir']), 'dataset : {}'.format(model_config['dataset']), 'train_size : {}'.format(model_config['train_size']), 'learning_rate : {}'.format(model_config['learning_rate']), 'feature : {}'.format(model_config['feature']), 'logging : {}'.format(model_config['logging']), sep='\n') elif model_config['Model'] == 20: pass elif model_config['Model'] == 21: pass elif model_config['Model'] == 22: alpha = model_config['alpha'] stored_A = stored_A + '_A_I' features = Model22(adj, features, alpha, stored_A) elif model_config['Model'] == 23: if model_config['classifier'] == 'tree': clf = tree.DecisionTreeClassifier( max_depth=model_config['tree_depth']) t = time.time() clf.fit(features[train_mask], np.argmax(y_train[train_mask], axis=1)) t = time.time() - t prediction = clf.predict(features[test_mask]) elif model_config['classifier'] == 'svm': clf = svm.SVC( ) #kernel='rbf', gamma=model_config['gamma'], class_weight='balanced', degree=model_config['svm_degree']) t = time.time() clf.fit(features[train_mask], np.argmax(y_train[train_mask], axis=1)) t = time.time() - t prediction = clf.predict(features[test_mask]) elif model_config['classifier'] == 'cnn': prediction, t = cnn.train(model_config, features, train_mask, y_train, test_mask, y_test) else: raise ValueError( "model_config['classifier'] should be in ['svm', 'tree']") test_acc = np.sum(prediction == np.argmax(y_test[test_mask], axis=1)) / np.sum(test_mask) # test_acc = test_acc[0] one_hot_prediction = np.zeros(y_test[test_mask].shape) one_hot_prediction[np.arange(one_hot_prediction.shape[0]), prediction] = 1 test_acc_of_class = np.sum(one_hot_prediction * y_test[test_mask], axis=0) / np.sum(y_test[test_mask], axis=0) #TODO print("Test set results: cost= {:.5f} accuracy= {:.5f} time= {:.5f}". format(0., test_acc, 0.)) print("accuracy of each class=", test_acc_of_class) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, prediction, size_of_each_class, t elif model_config['Model'] == 26: adj = Model26(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') elif model_config['Model'] == 28: features = Model28(adj, features, stored_A, model_config['k']) else: raise ValueError( '''model_config['Model'] must be in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,''' ''' 11, 12, 13, 14, 15, 16, 17, 18], but is {} now'''.format( model_config['Model'])) # Some preprocessing if model_config['connection'] == [ 'f' for i in range(len(model_config['connection'])) ]: train_features = features[train_mask] val_features = features[val_mask] test_features = features[test_mask] else: train_features = features val_features = features test_features = features if sparse.issparse(features): train_features = sparse_to_tuple(train_features) val_features = sparse_to_tuple(val_features) test_features = sparse_to_tuple(test_features) features = sparse_to_tuple(features) if model_config['Model'] == 12: if model_config['k'] < 0: if hasattr(model_config['train_size'], '__getitem__'): eta = 0 for i in model_config['train_size']: eta += i eta /= adj.shape[0] else: eta = model_config['train_size'] / 100 k = (1 / eta)**(1 / len(model_config['connection'])) k = int(k) else: k = model_config['k'] model_config['name'] += '_k{}'.format(k) support = Model12(adj, k) num_supports = len(support) elif model_config['conv'] == 'taubin': support = [ sparse_to_tuple( taubin_smoothor(adj, model_config['taubin_lambda'], model_config['taubin_mu'], model_config['taubin_repeat'])) ] num_supports = 1 elif model_config['conv'] == 'test21': support = [ sparse_to_tuple( Test21(adj, model_config['alpha'], beta=model_config['beta'], stored_A=stored_A + '_A_I')) ] num_supports = 1 elif model_config['conv'] == 'gcn': support = [preprocess_adj(adj)] num_supports = 1 elif model_config['conv'] == 'gcn_unnorm': support = [sparse_to_tuple(adj.astype(np.float32))] num_supports = 1 elif model_config['conv'] == 'gcn_noloop': support = [preprocess_adj(adj, loop=False)] num_supports = 1 elif model_config['conv'] == 'gcn_rw': support = [preprocess_adj(adj, type='rw')] num_supports = 1 elif model_config['conv'] in ['cheby', 'chebytheta']: # origin_adj_support = chebyshev_polynomials(origin_adj, model_config['max_degree']) support = chebyshev_polynomials(adj, model_config['max_degree']) num_supports = 1 + model_config['max_degree'] else: raise ValueError('Invalid argument for model_config["conv"]: ' + str(model_config['conv'])) # Define placeholders placeholders = { 'support': [ tf.sparse_placeholder(tf.float32, name='support' + str(i)) for i in range(num_supports) ], 'features': tf.sparse_placeholder(tf.float32, name='features') if isinstance( features, tf.SparseTensorValue) else tf.placeholder( tf.float32, shape=[None, features.shape[1]], name='features'), 'labels': tf.placeholder(tf.int32, name='labels', shape=(None, y_train.shape[1])), 'labels_mask': tf.placeholder(tf.int32, name='labels_mask'), 'dropout': tf.placeholder_with_default(0., name='dropout', shape=()), 'num_features_nonzero': tf.placeholder(tf.int32, name='num_features_nonzero'), # helper variable for sparse dropout 'laplacian': tf.SparseTensor(indices=np.vstack([laplacian.row, laplacian.col]).transpose(), values=laplacian.data, dense_shape=laplacian.shape), 'triplet': tf.placeholder(tf.int32, name='triplet', shape=(None, None)), 'noise_sigma': tf.placeholder(tf.float32, name='noise_sigma'), 'noise': tf.sparse_placeholder(tf.float32, name='features') if isinstance( features, tf.SparseTensorValue) else tf.placeholder( tf.float32, shape=[None, features.shape[1]], name='features') } if model_config['Model'] in [11, 13, 14, 15]: placeholders['label_per_sample'] = tf.placeholder( tf.float32, name='label_per_sample', shape=(None, label_per_sample.shape[1])) placeholders['sample2label'] = tf.placeholder( tf.float32, name='sample2label', shape=(label_per_sample.shape[1], y_train.shape[1])) # Create model model = GCN_MLP(model_config, placeholders, input_dim=train_features[2][1]) # Random initialize sess.run(tf.global_variables_initializer()) # Initialize FileWriter, saver & variables in graph train_writer = None valid_writer = None saver = None # Construct feed dictionary if model_config['connection'] == [ 'f' for i in range(len(model_config['connection'])) ]: train_feed_dict = construct_feed_dict( train_features, support, y_train[train_mask], np.ones(train_mask.sum(), dtype=np.bool), triplet, model_config['noise_sigma'], placeholders) train_feed_dict.update( {placeholders['dropout']: model_config['dropout']}) valid_feed_dict = construct_feed_dict( val_features, support, y_val[val_mask], np.ones(val_mask.sum(), dtype=np.bool), triplet, 0, placeholders) test_feed_dict = construct_feed_dict( test_features, support, y_test[test_mask], np.ones(test_mask.sum(), dtype=np.bool), triplet, 0, placeholders) else: train_feed_dict = construct_feed_dict(train_features, support, y_train, train_mask, triplet, model_config['noise_sigma'], placeholders) train_feed_dict.update( {placeholders['dropout']: model_config['dropout']}) valid_feed_dict = construct_feed_dict(val_features, support, y_val, val_mask, triplet, 0, placeholders) test_feed_dict = construct_feed_dict(test_features, support, y_test, test_mask, triplet, 0, placeholders) if model_config['Model'] in [11, 13, 14, 15]: train_feed_dict.update( {placeholders['label_per_sample']: label_per_sample}) train_feed_dict.update({placeholders['sample2label']: sample2label}) valid_feed_dict.update( {placeholders['label_per_sample']: label_per_sample}) valid_feed_dict.update({placeholders['sample2label']: sample2label}) test_feed_dict.update( {placeholders['label_per_sample']: label_per_sample}) test_feed_dict.update({placeholders['sample2label']: sample2label}) # tmp = sess.run([model.prediction, model.sample2label], feed_dict=test_feed_dict) # Some support variables valid_loss_list = [] max_valid_acc = 0 max_train_acc = 0 t_test = time.time() test_cost, test_acc, test_acc_of_class, prediction = sess.run( [ model.loss, model.accuracy, model.accuracy_of_class, model.prediction ], feed_dict=test_feed_dict) test_duration = time.time() - t_test timer = 0 begin = time.time() # print(time.time() - very_begining) if model_config['train']: # Train model print('training...') for step in range(model_config['epochs']): if model_config['Model'] in [ 20, 21 ] and step == model_config['epochs'] / 2: stored_A = stored_A + '_A_I' y_train, train_mask = Model20(prediction, model_config['t'], y_train, train_mask, adj, model_config['alpha'], stored_A) if model_config['Model'] == 21: y_train, train_mask = Model16(prediction, model_config['t2'], y_train, train_mask) train_feed_dict = construct_feed_dict( features, support, y_train, train_mask, model_config['noise_sigma'], placeholders) train_feed_dict.update( {placeholders['dropout']: model_config['dropout']}) max_valid_acc = 0 max_train_acc = 0 # Training step if model_config['logdir'] and step % 100 == 0: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() t = time.time() sess.run(model.opt_op, feed_dict=train_feed_dict, options=run_options, run_metadata=run_metadata) t = time.time() - t train_writer.add_run_metadata(run_metadata, 'step%d' % step) # Create the Timeline object, and write it to a json with open(path.join(model_config['logdir'], 'timeline.json'), 'w') as f: f.write( timeline.Timeline(run_metadata.step_stats). generate_chrome_trace_format()) else: t = time.time() if isinstance(train_features, tf.SparseTensorValue): train_feed_dict.update({ placeholders['features']: tf.SparseTensorValue( train_features.indices, train_features.values + np.random.normal(0, model_config['noise_sigma'], train_features.indices.shape[0]), train_features.dense_shape) }) else: train_feed_dict.update({ placeholders['features']: train_features + np.random.normal(0, model_config['noise_sigma'], train_features.shape) }) sess.run(model.opt_op, feed_dict=train_feed_dict) t = time.time() - t timer += t train_loss, train_acc, train_summary = sess.run( [model.loss, model.accuracy, model.summary], feed_dict=train_feed_dict) # Logging if model_config['logdir']: global_step = model.global_step.eval(session=sess) train_writer.add_summary(train_summary, global_step) valid_writer.add_summary(valid_summary, global_step) # If it's best performence so far, evalue on test set if model_config['validate']: valid_loss, valid_acc, valid_summary = sess.run( [model.loss, model.accuracy, model.summary], feed_dict=valid_feed_dict) valid_loss_list.append(valid_loss) if valid_acc >= max_valid_acc: max_valid_acc = valid_acc t_test = time.time() test_cost, test_acc, test_acc_of_class = sess.run( [model.loss, model.accuracy, model.accuracy_of_class], feed_dict=test_feed_dict) test_duration = time.time() - t_test prediction = sess.run(model.prediction, train_feed_dict) if args.verbose: print('*', end='') else: if train_acc >= max_train_acc: max_train_acc = train_acc t_test = time.time() test_cost, test_acc, test_acc_of_class = sess.run( [model.loss, model.accuracy, model.accuracy_of_class], feed_dict=test_feed_dict) test_duration = time.time() - t_test prediction = sess.run(model.prediction, train_feed_dict) if args.verbose: print('*', end='') # Print results if args.verbose: print("Epoch: {:04d}".format(step), "train_loss= {:.3f}".format(train_loss), "train_acc= {:.3f}".format(train_acc), end=' ') if model_config['validate']: print("val_loss=", "{:.3f}".format(valid_loss), "val_acc= {:.3f}".format(valid_acc), end=' ') print("time=", "{:.5f}".format(t)) if 0 < model_config['early_stopping'] < step \ and valid_loss_list[-1] > np.mean(valid_loss_list[-(model_config['early_stopping'] + 1):-1]): print("Early stopping...") break else: print("Optimization Finished!") # Testing print("Test set results:", "cost=", "{:.5f}".format(test_cost), "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration)) print("accuracy of each class=", test_acc_of_class) # Saving if model_config['logdir']: print('Save model to "{:s}"'.format( saver.save(sess=sess, save_path=path.join(model_config['logdir'], 'model.ckpt'), global_step=global_step))) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, prediction, size_of_each_class, time.time( ) - begin
def lenet_bn(lr, simple=True): lenet = LeNetBatchNormalization(simple=simple).net.to(device) optimizer = optim.Adam(lenet.parameters(), lr=lr) train(lenet, dataset, criterion, optimizer)
if net: logging.info('测试 DenseNet ...') X = torch.rand((1, 1, 96, 96)).to(device) for name, layer in net.named_children(): X = layer(X) logging.info(f'{name} output shape:\t{X.shape}') if __name__ == '__main__': batch_size, lr = 256, 0.001 dataset = Dataset(batch_size) criterion = nn.CrossEntropyLoss() logging.info('BatchNormalization 从零开始实现 ...') lenet_bn(lr) logging.info('BatchNormalization 简洁实现 ...') lenet_bn(lr, simple=False) logging.info('ResNet ...') resnet = ResNet().net.to(device) optimizer = optim.Adam(resnet.parameters(), lr=lr) train(resnet, dataset, criterion, optimizer) logging.info('DenseNet ...') # dataset = Dataset(batch_size, resize=96) densenet = DenseNet().net.to(device) optimizer = optim.Adam(densenet.parameters(), lr=lr) train(densenet, dataset, criterion, optimizer)
# This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # Train Module # Module to train a CNN model on character data. import numpy as np import cnn import utility input_arguments = utility.parse_input_arguments(module="train") images, labels = utility.load_data(input_arguments.image_path) class_count = len(np.unique(labels)) images, labels = utility.preprocess(images, labels) images, labels = utility.shuffle(images, labels) x_train, x_test, y_train, y_test = utility.split(images, labels, test_size=0.2) cnn = cnn.CNN(x_train.shape[1], x_train.shape[2], x_train.shape[3], class_count) cnn.summary() cnn.train(x_train, y_train, epochs=input_arguments.epochs, batch_size=input_arguments.batch_size, validation_split=input_arguments.validation_split, output_path=input_arguments.output_path) cnn.test(x_test, y_test, output_path=input_arguments.output_path)
def main(state, channel): # Load the MNIST dataset. print 'Loading MNIST from ' ''' mnist = fetch_mldata('MNIST original', data_home=data_dir) # Split the data into train, valid and test sets. # TODO: add Scaling, normalization options. # reference: https://github.com/rosejn/torch-datasets/blob/master/dataset/mnist.lua # scaling: scale values between [0,1] (by default, they are in the range [0, 255]) # TODO: try a [-1, 1] scaling which according to this post gives better results for # the svm: http://peekaboo-vision.blogspot.ca/2010/09/mnist-for-ever.html # Test that the test sets is the same as the one found in Yann LeCun's page. train_valid_x = mnist.data[:-10000, :] / scale train_valid_y = mnist.target[:-10000] test_x = mnist.data[-10000:, :] / scale test_y = mnist.target[-10000:] del mnist # Shuffle the train, valid and test sets since they are ordered. train_valid_x, train_valid_y = shuffle(train_valid_x, train_valid_y, random_state=random_state) test_x, test_y = shuffle(test_x, test_y) ''' dataset = None data_path = None splits = None if state.features is None: if state.dataset == 'mnist': dataset = os.path.join(data_dir, 'mnist.pkl.gz') splits = [train_size, valid_size, test_size] print 'Loading the MNIST dataset from %s' %data_path elif state.dataset in ['mq+diff+std+top10']: data_path = os.path.join(data_dir, 'MQ', 'standardized', 'diff_augmented') print 'Loading the augmented standardized MQ dataset from %s' %data_path elif state.dataset in ['mq+diff+log+top10']: data_path = os.path.join(data_dir, 'MQ', 'log_normalized', 'diff_augmented') print 'Loading the augmented log-normalized MQ dataset from %s' %data_path elif state.dataset in ['mq+diff+log+std+top10']: data_path = os.path.join(data_dir, 'MQ', 'log_normalized+standardized', 'diff_augmented') print 'Loading the augmented log-normalized+standardized MQ dataset from %s' %data_path elif state.dataset in ['mq+diff+std+log+top10']: data_path = os.path.join(data_dir, 'MQ', 'standardized+log_normalized', 'diff_augmented') print 'Loading the augmented standardized+log-normalized MQ dataset from %s' %data_path else : raise NotImplementedError('Datatset %s not supported!'%state.dataset) if state.model in ['nnet', 'cnn']: state.gpu = True print 'GPU should be enabled' # TODO: check how to retrieve the gpu status. if state.gpu: #print 'GPU enabled' print 'Loading dataset in shared variables' else: #print 'GPU disabled' print 'Loading dataset in numpy array' datasets = load_data(dataset=dataset, data_path=data_path, splits=splits, shared=state.gpu, state=state) train_x, train_y = datasets[0] valid_x, valid_y = datasets[1] test_x, test_y = datasets[2] else: print 'Using HOG features' assert state.dataset == 'mnist' data_path = os.path.join(data_dir, 'mnist.pkl.gz') f = gzip.open(data_path, 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() train_x = numpy.load(os.path.join(data_dir, 'train_set_hog_features.npy')) valid_x = numpy.load(os.path.join(data_dir, 'valid_set_hog_features.npy')) test_x = numpy.load(os.path.join(data_dir, 'test_set_hog_features.npy')) train_y = train_set[1] valid_y = valid_set[1] test_y = test_set[1] #train_x = train_x[0:1000,:] #train_y = train_y[0:1000] #import pdb; pdb.set_trace() # Cross-validation. ''' if cv_strategy == 'KFold': assert len(valid_x) > 0 print 'KFold used' # Concatenate both the train and validation sets. train_valid_x = numpy.concatenate((train_x, valid_x), axis=0) train_valid_y = numpy.concatenate((train_y, valid_y), axis=0) kf = cross_validation.KFold(len(train_valid_x), n_folds=9) for train_index, valid_index in kf: train_x, valid_x = train_valid_x[train_index], train_valid_x[valid_index] train_y, valid_y = train_valid_y[train_index], train_valid_y[valid_index] train(state, channel, train_x, train_y, valid_x, valid_y, test_x, test_y) elif cv_strategy is None: print 'No cross-validation' train(state, channel, train_x, train_y, valid_x, valid_y, test_x, test_y) else: raise NotImplementedError('Cross-validation type not supported.') ''' print 'Confing ', state # Start timer for training. start = time.time() if state.model == 'nnet': status = mlp.train(state, channel, train_x, train_y, valid_x, valid_y, test_x, test_y) elif state.model == 'cnn': status = cnn.train(state, channel, train_x, train_y, valid_x, valid_y, test_x, test_y) else: status = train(state, channel, train_x, train_y, valid_x, valid_y, test_x, test_y) stop = time.time() print 'It took %s minutes'%( (stop-start) / float(60) ) if state.save_state: print 'We will save the experiment state' dump_tar_bz2(state, 'state.tar.bz2') return 0
def predict(self, data: np.ndarray) -> np.ndarray: n = data.shape[0] label = np.empty(shape=(n,), dtype=int) # out = np.empty(shape=(n,), dtype=int) for i in range(n // self.batch_size): out = self.forward(data[i * self.batch_size:(i + 1) * self.batch_size]) label[i * self.batch_size:(i + 1) * self.batch_size] = self.get_label(out) if n % self.batch_size != 0: start = n // self.batch_size * self.batch_size rest = n - n // self.batch_size * self.batch_size rest_batch = np.empty((self.batch_size, 2, 247, 247, 3), dtype=floatX) rest_batch[:rest] = data[start:] out = self.forward(rest_batch) label[start:] = self.get_label(out) return label if __name__ == '__main__': # data, label = preprocess.get_img() # data = data[:batch_size] # label = label[:batch_size] data = np.empty(shape=(batch_size, 2, 247, 247, 3), dtype=floatX) label = np.empty(shape=(batch_size,), dtype=int) data[:batch_size // 2, 0] = np.random.randn(batch_size // 2, 247, 247, 3) data[:batch_size // 2, 1] = data[:batch_size // 2, 0] label[:batch_size // 2] = 1 data[batch_size // 2:] = np.random.randn(batch_size // 2, 2, 247, 247, 3) label[batch_size // 2:] = 0 cnn = CNN(batch_size=batch_size) cnn.train(data, label, 1000)
def train(): with tf.Graph().as_default(): log('===== START TRAIN RUN: ' + str(datetime.now()) + '=====') global_step = tf.Variable(0, trainable=False) # get examples and labels examples, labels = cnn.inputs(data_type='train') # build graph to compute logits logits = cnn.inference(examples) # compute loss loss, losses_collection = cnn.loss(logits, labels) accuracy = cnn.accuracy(logits, labels) # train model with one batch of examples train_op = cnn.train(loss, global_step) # create saver saver = tf.train.Saver(tf.all_variables()) # build summary and init op summary_op = tf.merge_all_summaries() init_op = tf.initialize_all_variables() # start session # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) sess = tf.Session() sess.run(init_op) # start queue runners tf.train.start_queue_runners(sess=sess) # set up summary writers train_writer = tf.train.SummaryWriter(config.train_dir, sess.graph) for step in xrange(config.max_steps): start_time = time.time() summary, loss_value, accuracy_value, _ = sess.run([summary_op, loss, accuracy, train_op]) loss_breakdown = [(str(l.op.name), sess.run(l)) for l in losses_collection] duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % config.summary_every_n_steps == 0: # summaries examples_per_sec = config.batch_size / duration sec_per_batch = float(duration) train_writer.add_summary(summary, step) log_str_1 = ('%s: step %d, loss = %.3f (%.2f examples/sec; %.3f sec/batch), accuracy %.3f ') % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch, accuracy_value) log_str_1 += str(loss_breakdown) # print loss breakdown log(log_str_1) log("memory usage: {} Mb".format(float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)/1000000.0)) if (step % config.ckpt_every_n_steps == 0) and (step>0): # save weights to file & validate checkpoint_path = os.path.join(config.checkpoint_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) log("Checkpoint saved at step %d" % step)
cv += 1 print "cross validation fold %d" % (cv) trvin = vinlist[tr] tstvin = vinlist[tst] svmtrain = filter(lambda x: x['vin'] in trvin, svmdata) svmtest = filter(lambda x: x['vin'] in tstvin, svmdata) cnntrain = {} cnntest = {} for k in cnndata.keys(): if (k in trvin): cnntrain[k] = cnndata[k] if (k in tstvin): cnntest[k] = cnndata[k] svm.train(svmtrain) cnn.train(cnntrain) svmclassify = svm.classify(svmtest) svmres = svmclassify['detail'] svmacc = svmclassify['accuracy'] cnnclassify = cnn.classify(cnntest) cnnres = cnnclassify['detail'] cnnacc = cnnclassify['accuracy'] print "standalone classifier accuracy: svm -- %f , cnn -- %f" % (svmacc, cnnacc) pred = {} for each in svmres: vin = each['vin'] svm_proba = each['proba_predicted'] cnn_proba = cnnres[vin]['predsum']
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) images, labels = cnn.distorted_inputs() logits = cnn.inference(images) loss = cnn.loss(logits, labels) train_op = cnn.train(loss, global_step) summary_op = tf.merge_all_summaries() init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) saver = tf.train.Saver(tf.all_variables()) if tf.gfile.Exists(TRAIN_DIR): ckpt = tf.train.get_checkpoint_state(CHECKPOINT_DIR) last_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] ckpt_dir = os.path.join(CHECKPOINT_DIR,"model.ckpt-" + last_step) if ckpt and ckpt_dir: tf.gfile.DeleteRecursively(TRAIN_DIR) saver.restore(sess, ckpt_dir) assign_op = global_step.assign(int(last_step)) sess.run(assign_op) print ("Read old model from: ", ckpt_dir) print ("Starting training at: ", sess.run(global_step)) else: tf.gfile.DeleteRecursively(TRAIN_DIR) sess.run(init) print ("No model found. Starting training at: ",sess.run(global_step)) else: tf.gfile.MakeDirs(TRAIN_DIR) sess.run(init) print ("No folder found. Starting training at: ",sess.run(global_step)) print ("Writing train results to: ", TRAIN_DIR) print ("Train file: ", TRAIN_FILE) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) for step in xrange(sess.run(global_step), MAX_STEPS): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = BATCH_SIZE examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 10 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == MAX_STEPS: checkpoint_path = os.path.join(TRAIN_DIR, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
# max_input_length = int(fid.read()) featreader = feature_reader.FeatureReader(featdir + '/feats_shuffled.scp', featdir + '/cmvn.scp', featdir + '/utt2spk', int(cnn_conf['context_width']), max_input_length) # create a target coder coder = target_coder.AlignmentCoder(lambda x, y: x, num_labels) # lda在哪里做? dispenser = batchdispenser.AlignmentBatchDispenser(featreader, coder, int(cnn_conf['batch_size']), input_dim, alifile) #train the neural net print('------- training neural net ----------') #create the neural net cnn = cnn.Cnn(input_dim, num_labels, total_frames, cnn_conf) cnn.train(dispenser) # if TEST_NNET: # #use the neural net to calculate posteriors for the testing set # print '------- computing state pseudo-likelihoods ----------' # savedir = config.get('directories', 'expdir') + '/' + config.get('nnet', 'name') # decodedir = savedir + '/decode' # if not os.path.isdir(decodedir): # os.mkdir(decodedir) # featdir = config.get('directories', 'test_features') + '/' + config.get('dnn-features', 'name') # #create a feature reader # with open(featdir + '/maxlength', 'r') as fid:
import torch.nn.functional as F from torch.autograd import Variable import torchvision.datasets as dset import torchvision.transforms as transforms import numpy as np from PIL import ImageOps import torch.optim as optim from PIL import Image #import matplotlib.pyplot as plt import time import cnn if __name__ == "__main__": trans = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor()]) mydata = dset.ImageFolder('./char74k', transform=trans, loader=cnn.pil_loader) loader = torch.utils.data.DataLoader(mydata, batch_size=128, shuffle=True, num_workers=2) model = cnn.Net() model.cuda() optimizer = optim.Adam(model.parameters(), lr=1e-4, eps=1e-4) for epoch in range(10): cnn.train(epoch, model, optimizer, loader) torch.save(model.state_dict(), 'char_recognizer.pt')