def load_train_data(start, stop): cwd = Path(os.path.abspath(os.path.dirname(__file__))) data = cwd / 'data' hdf5_dir = (data / 'hdf5_data').as_posix() train_hdf5_dir = os.path.join(hdf5_dir, 'train') files = provider.getDataFiles(os.path.join(train_hdf5_dir, 'all_files.txt')) data_batch_list = [] label_batch_list = [] count = 0 for h5_filename in files[start:stop]: data_batch, label_batch = provider.load_h5(h5_filename) # data_batch = provider.jitter_point_cloud(data_batch) print(f'h5_filename = {h5_filename}') print(f'data_batch.shape = {data_batch.shape}') count += data_batch.shape[0] data_batch_list.append(data_batch) label_batch_list.append(label_batch) data_batches = np.concatenate(data_batch_list, 0) label_batches = np.concatenate(label_batch_list, 0) print(f'data_batches.shape = {data_batches.shape}') print(f'label_batches.shape = {label_batches.shape}') print(f'count = {count}') train_idxs = list(range(0, count)) train_data = data_batches[train_idxs, ...] train_label = label_batches[train_idxs] print( f'train_data.shape, train_label.shape = {train_data.shape}, {train_label.shape}' ) return train_data, train_label
def load_test_data(): """Load test data.""" cwd = Path(os.path.abspath(os.path.dirname(__file__))) data = cwd / 'data' hdf5_dir = (data / 'hdf5_data').as_posix() test_hdf5_dir = os.path.join(hdf5_dir, 'test') test_files = provider.getDataFiles( os.path.join(test_hdf5_dir, 'all_files.txt')) data_batch_list = [] label_batch_list = [] count = 0 for h5_filename in test_files: data_batch, label_batch = provider.load_h5(h5_filename) # data_batch = provider.jitter_point_cloud(data_batch) print(f'h5_filename = {h5_filename}') print(f'data_batch.shape = {data_batch.shape}') count += data_batch.shape[0] data_batch_list.append(data_batch) label_batch_list.append(label_batch) data_batches = np.concatenate(data_batch_list, 0) label_batches = np.concatenate(label_batch_list, 0) print(f'data_batches.shape = {data_batches.shape}') print(f'label_batches.shape = {label_batches.shape}') test_idxs = list(range(0, count)) print(f'len(test_idxs) = {len(test_idxs)}') test_data = data_batches[test_idxs, ...] test_label = label_batches[test_idxs] print( f'test_data.shape, test_label.shape = {test_data.shape}, {test_label.shape}' ) return test_data, test_label
def train_one_epoch(sess, ops, train_writer): """ ops: dict mapping from string to tf ops """ is_training = True # Shuffle train samples train_idxs = np.arange(0, len(TRAIN_FILES)) np.random.shuffle(train_idxs) total_correct = total_seen = total_sig = loss_sum = 0 for fn in range(len(TRAIN_FILES)): #log_string('----' + str(fn) + '-----') current_file = os.path.join(H5_DIR, TRAIN_FILES[train_idxs[fn]]) current_data, current_label, global_pl = provider.load_h5(current_file, 'seg', glob=True) #print (current_data, current_label) current_data, current_label, current_global, _ = provider.shuffle_data( current_data, np.squeeze(current_label), global_pl=global_pl) current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE #log_string(str(datetime.now())) for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE batch_data, batch_label, batch_global = get_batch( current_data, current_label, current_global, start_idx, end_idx) #print(batch_weight) feed_dict = { ops['pointclouds_pl']: batch_data, ops['labels_pl']: batch_label, ops['is_training_pl']: is_training, ops['global_pl']: batch_global, } summary, step, _, loss_val, pred_val, coefs = sess.run( [ ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred'], ops['coefs'] ], #ops['coefs2']], feed_dict=feed_dict) train_writer.add_summary(summary, step) pred_val = np.argmax(pred_val, 2) total_sig += np.sum(batch_label) correct = np.sum((pred_val == batch_label)) total_correct += correct total_seen += BATCH_SIZE * NUM_POINT loss_sum += np.mean(loss_val) log_string('mean loss: %f' % (loss_sum / float(num_batches))) log_string('accuracy: %f' % (total_correct / float(total_seen)))
def __init__(self, root, batch_size=16, npoints=1024, split='train', shuffle=True, augment=True): ''' root: file path of data batch size: n_points: number of points split: 'train' or 'test' shuffle: if true, shuffle the dataset augment: if true, do data augmentation ''' self.root = root self.batch_size = batch_size self.npoints = npoints self.split = split self.augment = augment if split is None: if split == 'train': self.shuffle = True else: self.shuffle == False else: self.shuffle = shuffle # load category names self.catfile = os.path.join(self.root, 'shape_names.txt') self.cat = [line.rstrip() for line in open(self.catfile)] shape_ids = {} # load data path shape_ids['train'] = [ line.rstrip() for line in open(os.path.join(self.root, 'train_files.txt')) ] shape_ids['test'] = [ line.rstrip() for line in open(os.path.join(self.root, 'test_files.txt')) ] # it's a small dataset, load all data in memory datas = [provider.load_h5(x)[0] for x in shape_ids[split]] labels = [provider.load_h5(x)[1] for x in shape_ids[split]] self.datas = np.concatenate(datas, axis=0) # concatenate list to numpy array self.labels = np.concatenate(labels, axis=0) self.on_epoch_end()
def train_one_epoch(sess, ops, train_writer): """ ops: dict mapping from string to tf ops """ is_training = True loss_sum = 0 current_data_pl, current_label = provider.load_h5(TRAIN_FILE, 'class') #,nevts=5e5 if multi: current_label = np.argmax(current_label, axis=-1) current_data_pl, current_label, _ = provider.shuffle_data( current_data_pl, np.squeeze(current_label)) file_size = current_data_pl.shape[0] num_batches = file_size // BATCH_SIZE #num_batches = 4 log_string(str(datetime.now())) for batch_idx in range(num_batches): start_idx = batch_idx * (BATCH_SIZE) end_idx = (batch_idx + 1) * (BATCH_SIZE) batch_data_pl, batch_label = get_batch(current_data_pl, current_label, start_idx, end_idx) mask_padded = batch_data_pl[:, :, 2] == 0 feed_dict = { ops['pointclouds_pl']: batch_data_pl, ops['labels_pl']: batch_label, ops['mask_pl']: mask_padded.astype(float), ops['is_training']: is_training, } train_op = 'train_op' attention = 'attention' loss = 'loss' summary, step, _, loss, attention = sess.run([ ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['attention'] ], feed_dict=feed_dict) #print(attention) train_writer.add_summary(summary, step) loss_sum += np.mean(loss) log_string('mean loss: %f' % (loss_sum / float(num_batches)))
def eval_one_epoch(sess, ops): is_training = False y_pred = [] current_data_pl, current_label = provider.load_h5(EVALUATE_FILE, 'class') if multi: current_label = np.argmax(current_label, axis=-1) file_size = current_data_pl.shape[0] num_batches = file_size // BATCH_SIZE #num_batches = 4 for batch_idx in range(num_batches): start_idx = batch_idx * (BATCH_SIZE) end_idx = (batch_idx + 1) * (BATCH_SIZE) batch_data_pl, batch_label = get_batch(current_data_pl, current_label, start_idx, end_idx) mask_padded = batch_data_pl[:, :, 2] == 0 feed_dict = { ops['pointclouds_pl']: batch_data_pl, ops['labels_pl']: batch_label, ops['is_training']: is_training, ops['mask_pl']: mask_padded.astype(float), } atts1, atts2, atts3, pred = sess.run( [ops['atts1'], ops['atts2'], ops['atts3'], ops['pred']], feed_dict=feed_dict) if len(y_pred) == 0: y_pred = np.squeeze(pred) else: y_pred = np.concatenate((y_pred, pred), axis=0) with h5py.File(os.path.join(H5_OUT, '{0}.h5'.format(FLAGS.name)), "w") as fh5: dset = fh5.create_dataset("DNN", data=y_pred) dset = fh5.create_dataset("pid", data=current_label[:num_batches * (BATCH_SIZE)])
def eval_one_epoch(sess, ops): is_training = False total_correct = 0 total_seen = 0 loss_sum = 0 ncorr = 0 eval_idxs = np.arange(0, len(EVALUATE_FILES)) y_val = [] for fn in range(len(EVALUATE_FILES)): current_file = os.path.join(H5_DIR, EVALUATE_FILES[eval_idxs[fn]]) current_data, current_label, current_global = provider.load_h5( current_file, 'class', glob=True) current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE for batch_idx in range(num_batches): scores = np.zeros(NUM_POINT) true = np.zeros(NUM_POINT) start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE batch_data, batch_label, batch_global = get_batch( current_data, current_label, current_global, start_idx, end_idx) cur_batch_size = end_idx - start_idx feed_dict = { ops['pointclouds_pl']: batch_data, ops['labels_pl']: batch_label, ops['is_training_pl']: is_training, ops['global_pl']: batch_global, } #,beforemax loss, pred, coefs, coefs2 = sess.run( [ops['loss'], ops['pred'], ops['coefs'], ops['coefs2']], feed_dict=feed_dict) pred_val = np.argmax(pred, 1) correct = np.sum(pred_val == batch_label) total_correct += correct total_seen += (BATCH_SIZE) loss_sum += np.mean(loss) idx_batch = 0 if len(y_val) == 0: y_val = batch_label y_coef1 = np.squeeze(np.max(coefs, -1)) y_coef2 = np.squeeze(np.max(coefs2, -1)) y_data = batch_data[:, :, :3] y_sc = pred[:, 1] else: y_val = np.concatenate((y_val, batch_label), axis=0) y_coef1 = np.concatenate( (y_coef1, np.squeeze(np.max(coefs, -1))), axis=0) y_coef2 = np.concatenate( (y_coef2, np.squeeze(np.max(coefs2, -1))), axis=0) y_data = np.concatenate((y_data, batch_data[:, :, :3]), axis=0) y_sc = np.concatenate((y_sc, pred[:, 1]), axis=0) pos_label = 1 fpr, tpr, thresholds = metrics.roc_curve(y_val, y_sc, pos_label=pos_label) print('AUC: ', metrics.roc_auc_score(y_val, y_sc)) signal = y_sc[y_val == 1] background = y_sc[y_val == 0] n, bins, patches = plt.hist([signal, background], 50, color=['m', 'g'], alpha=0.75, range=(0, 1), label=['Signal', 'Background'], histtype='stepfilled') plt.grid(True) plt.savefig("{0}/output_{1}.pdf".format(FLAGS.plot_path, FLAGS.name), dpi=150) print('Saving DNN output histograms at: ', "{0}/output_{1}.pdf".format(FLAGS.plot_path, FLAGS.name)) fig, base = plt.subplots(dpi=150) p = base.semilogy(tpr, 1.0 / fpr, color='m') bineff30 = np.argmax(tpr > 0.3) bineff50 = np.argmax(tpr > 0.5) print('1/effB at {0} effS: '.format(tpr[bineff30]), 1.0 / fpr[bineff30]) print('1/effB at {0} effS: '.format(tpr[bineff50]), 1.0 / fpr[bineff50]) base.set_xlabel("True Postive Rate") base.set_ylabel("1.0/False Postive Rate") plt.grid(True) plt.savefig("{0}/ROC_{1}.pdf".format(FLAGS.plot_path, FLAGS.name)) total_loss = loss_sum * 1.0 / float(num_batches) print('The total accuracy is {0}'.format(total_correct / float(total_seen))) npyname = 'GapNet_{0}.npy'.format(FLAGS.name) np.save(npyname, y_sc) with h5py.File('{0}.h5'.format(FLAGS.name), "w") as fh5: dset = fh5.create_dataset("pid", data=y_val) dset = fh5.create_dataset("DNN", data=y_sc) dset = fh5.create_dataset("coef1", data=y_coef1) dset = fh5.create_dataset("coef2", data=y_coef2) dset = fh5.create_dataset("data", data=y_data)
import tensorflow as tf import numpy as np import sys import os import math import provider import keras from keras.models import load_model from keras.utils import plot_model #read file # TEST_FILES = provider.getDataFiles('/Users/wangxue/gitpro/DL/pointcloud/pointtest/modelnet40_ply_hdf5_2048/test_files.txt') #load model model1 = load_model('pointtest/model/modelK11.h5') #load model predict_data, predict_label = provider.load_h5( 'pointtest/modelnet40_ply_hdf5_2048/ply_data_test1.h5') predict_data = predict_data[:, 0:2048, :] # predict_data, predict_label, _ = provider.shuffle_data(test_data, np.squeeze(test_label)) predict_data = predict_data[:, :, :, np.newaxis] predict_label = np.squeeze(predict_label) predict_label = keras.utils.to_categorical(predict_label, num_classes=40) pre = model1.predict(predict_data, batch_size=32, verbose=1) print(pre) print("----\n", predict_label) max_probability = 0.0 index1 = 0 index2 = 0 accuracy = 0 #calculate the prediction accuracy pre_objects = predict_data.shape[0]
os.system('cp %s %s' % (MODEL_FILE, LOG_DIR)) # bkp of model def os.system('cp train_ours_triplet.py %s' % (LOG_DIR)) # bkp of train procedure LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w') LOG_FOUT.write(str(FLAGS) + '\n') BN_INIT_DECAY = 0.5 BN_DECAY_DECAY_RATE = 0.5 BN_DECAY_DECAY_STEP = float(DECAY_STEP) BN_DECAY_CLIP = 0.99 HOSTNAME = socket.gethostname() OBJ_CAT = FLAGS.category TRAIN_FILE = '../candidate_generation/train_' + OBJ_CAT + '.h5' TEST_FILE = '../candidate_generation/test_' + OBJ_CAT + '.h5' TRAIN_DATA = provider.load_h5(TRAIN_FILE) TEST_DATA = provider.load_h5(TEST_FILE) TRAIN_CANDIDATES_FILE = 'generate_deformed_candidates/arap_triplet_train_' + OBJ_CAT + '.pickle' pickle_in = open(TRAIN_CANDIDATES_FILE, "rb") TRAIN_DICT = pickle.load(pickle_in) # TEST_CANDIDATES_FILE = FLAGS.test_candidates_file # pickle_in = open(TEST_CANDIDATES_FILE,"rb") # TEST_DICT = pickle.load(pickle_in) OUTPUT_DIM = FLAGS.output_dim np.random.seed(0)
def train_one_epoch(sess, ops, train_writer, is_full_training): """ ops: dict mapping from string to tf ops """ is_training = True train_idxs = np.arange(0, len(TRAIN_FILES)) acc = loss_sum = 0 y_pool = [] y_assign = [] for fn in range(len(TRAIN_FILES)): #log_string('----' + str(fn) + '-----') current_file = os.path.join(H5_DIR, TRAIN_FILES[train_idxs[fn]]) if RD: current_data, current_cluster, current_label = provider.load_h5_data_label_seg( current_file) else: current_data, current_label = provider.load_h5(current_file, 'seg') adds = provider.load_add(current_file, ['global']) if NUM_GLOB < adds['global'].shape[1]: log_string("Using less global variables than possible") adds['global'] = adds['global'][:, :NUM_GLOB] current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE if FLAGS.nbatches > 0: num_batches = FLAGS.nbatches log_string(str(datetime.now())) for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE batch_data, batch_label, batch_global = get_batch( current_data, current_label, adds['global'], start_idx, end_idx) cur_batch_size = end_idx - start_idx #print(batch_weight) feed_dict = { ops['pointclouds_pl']: batch_data, ops['labels_pl']: batch_label, ops['global_pl']: batch_global, ops['is_training_pl']: is_training, ops['alpha']: 10 * (EPOCH_CNT - MAX_PRETRAIN + 1), } if is_full_training: summary, step, _, loss_val, pred_val, max_pool, dist = sess.run( [ ops['merged'], ops['step'], ops['train_op_full'], ops['kmeans_loss'], ops['pred'], ops['max_pool'], ops['stack_dist'] ], feed_dict=feed_dict) cluster_assign = np.zeros((cur_batch_size), dtype=int) for i in range(cur_batch_size): index_closest_cluster = np.argmin(dist[:, i]) cluster_assign[i] = index_closest_cluster if RD: batch_cluster = current_cluster[start_idx:end_idx] if batch_cluster.size == cluster_assign.size: acc += cluster_acc(batch_cluster, cluster_assign) else: summary, step, _, loss_val, pred_val, max_pool = sess.run( [ ops['merged'], ops['step'], ops['train_op'], ops['classify_loss'], ops['pred'], ops['max_pool'] ], feed_dict=feed_dict) loss_sum += np.mean(loss_val) if len(y_pool) == 0: y_pool = np.squeeze(max_pool) else: y_pool = np.concatenate((y_pool, np.squeeze(max_pool)), axis=0) train_writer.add_summary(summary, step) log_string('mean loss: %f' % (loss_sum / float(num_batches))) log_string('train clustering accuracy: %f' % (acc / float(num_batches))) return y_pool
def eval_one_epoch(sess, ops, test_writer, is_full_training): """ ops: dict mapping from string to tf ops """ global EPOCH_CNT is_training = False test_idxs = np.arange(0, len(TEST_FILES)) # Test on all data: last batch might be smaller than BATCH_SIZE loss_sum = acc = 0 acc_seg = 0 for fn in range(len(TEST_FILES)): #log_string('----' + str(fn) + '-----') current_file = os.path.join(H5_DIR, TEST_FILES[test_idxs[fn]]) if RD: current_data, current_cluster, current_label = provider.load_h5_data_label_seg( current_file) else: current_data, current_label = provider.load_h5(current_file, 'seg') adds = provider.load_add(current_file, ['global']) if NUM_GLOB < adds['global'].shape[1]: log_string("Using less global variables than possible") adds['global'] = adds['global'][:, :NUM_GLOB] current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE batch_data, batch_label, batch_global = get_batch( current_data, current_label, adds['global'], start_idx, end_idx) cur_batch_size = end_idx - start_idx feed_dict = { ops['pointclouds_pl']: batch_data, ops['is_training_pl']: is_training, ops['global_pl']: batch_global, ops['labels_pl']: batch_label, ops['alpha']: 10 * (EPOCH_CNT - MAX_PRETRAIN + 1), } if is_full_training: summary, step, loss_val, pred_val, max_pool, dist = sess.run( [ ops['merged'], ops['step'], ops['kmeans_loss'], ops['pred'], ops['max_pool'], ops['stack_dist'], #ops['pi'] ], feed_dict=feed_dict) cluster_assign = np.zeros((cur_batch_size), dtype=int) for i in range(cur_batch_size): index_closest_cluster = np.argmin(dist[:, i]) cluster_assign[i] = index_closest_cluster if RD: batch_cluster = current_cluster[start_idx:end_idx] if batch_cluster.size == cluster_assign.size: acc += cluster_acc(batch_cluster, cluster_assign) else: summary, step, loss_val, pred_val, max_pool = sess.run( [ ops['merged'], ops['step'], ops['classify_loss'], ops['pred'], ops['max_pool'], ], feed_dict=feed_dict) test_writer.add_summary(summary, step) loss_sum += np.mean(loss_val) total_loss = loss_sum * 1.0 / float(num_batches) log_string('mean loss: %f' % (total_loss)) log_string('testing clustering accuracy: %f' % (acc / float(num_batches))) EPOCH_CNT += 1 if FLAGS.min == 'acc': return total_correct / float(total_seen) else: return total_loss
def eval_one_epoch(sess, ops): is_training = False eval_idxs = np.arange(0, len(EVALUATE_FILES)) y_assign = [] y_glob = [] acc = 0 for fn in range(len(EVALUATE_FILES)): current_file = os.path.join(H5_DIR, EVALUATE_FILES[eval_idxs[fn]]) if RD: current_data, current_cluster, current_label = provider.load_h5_data_label_seg( current_file) else: current_data, current_label = provider.load_h5(current_file, 'seg') adds = provider.load_add(current_file, ['global', 'masses']) if NUM_GLOB < adds['global'].shape[1]: print("Using less global variables than possible") current_glob = adds['global'][:, :NUM_GLOB] else: current_glob = adds['global'] current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE batch_data, batch_label, batch_global = get_batch( current_data, current_label, current_glob, start_idx, end_idx) cur_batch_size = end_idx - start_idx feed_dict = { ops['pointclouds_pl']: batch_data, ops['global_pl']: batch_global, ops['labels_pl']: batch_label, ops['alpha']: 1, #No impact during evaluation ops['is_training_pl']: is_training, } dist, mu, max_pool = sess.run( [ops['stack_dist'], ops['mu'], ops['max_pool']], feed_dict=feed_dict) cluster_assign = np.zeros((cur_batch_size), dtype=int) if RD: batch_cluster = current_cluster[start_idx:end_idx] for i in range(cur_batch_size): index_closest_cluster = np.argmin(dist[:, i]) cluster_assign[i] = index_closest_cluster if RD: acc += cluster_acc(batch_cluster, cluster_assign) if len(y_assign) == 0: if RD: y_val = batch_cluster y_assign = cluster_assign y_pool = np.squeeze(max_pool) else: y_assign = np.concatenate((y_assign, cluster_assign), axis=0) y_pool = np.concatenate((y_pool, np.squeeze(max_pool)), axis=0) if RD: y_val = np.concatenate((y_val, batch_cluster), axis=0) if len(y_glob) == 0: y_glob = adds['global'][:num_batches * BATCH_SIZE] y_mass = adds['masses'][:num_batches * BATCH_SIZE] else: y_glob = np.concatenate( (y_glob, adds['global'][:num_batches * BATCH_SIZE]), axis=0) y_mass = np.concatenate( (y_mass, adds['masses'][:num_batches * BATCH_SIZE]), axis=0) with h5py.File(os.path.join(H5_OUT, '{0}.h5'.format(FLAGS.name)), "w") as fh5: if RD: dset = fh5.create_dataset("label", data=y_val) dset = fh5.create_dataset("pid", data=y_assign) dset = fh5.create_dataset("max_pool", data=y_pool) dset = fh5.create_dataset("global", data=y_glob) dset = fh5.create_dataset("masses", data=y_mass)
def eval_one_epoch(sess, ops): is_training = False total_correct = total_correct_ones = total_seen = total_seen_ones = loss_sum = 0 eval_idxs = np.arange(0, len(EVALUATE_FILES)) y_val = [] for fn in range(len(EVALUATE_FILES)): current_file = os.path.join(H5_DIR, EVALUATE_FILES[eval_idxs[fn]]) current_truth = [] current_mass = [] current_data, current_label, current_global = provider.load_h5( current_file, 'seg', glob=True) current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE for batch_idx in range(num_batches): scores = np.zeros(NUM_POINT) true = np.zeros(NUM_POINT) start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE batch_data, batch_label, batch_global = get_batch( current_data, current_label, current_global, start_idx, end_idx) cur_batch_size = end_idx - start_idx feed_dict = { ops['pointclouds_pl']: batch_data, ops['labels_pl']: batch_label, ops['is_training_pl']: is_training, ops['global_pl']: batch_global, } #,beforemax loss, pred, coefs, coefs2 = sess.run( [ops['loss'], ops['pred'], ops['coefs'], ops['coefs2']], feed_dict=feed_dict) pred_val = np.argmax(pred, 2) correct = np.sum(pred_val == batch_label) correct_ones = np.sum(pred_val * batch_label) total_correct += correct total_correct_ones += correct_ones total_seen += (BATCH_SIZE * NUM_POINT) total_seen_ones += np.sum(batch_label) loss_sum += np.mean(loss) if len(y_val) == 0: y_val = batch_label y_data = batch_data[:, :, :] y_glob = batch_global y_sc = pred[:, :, 1] else: y_val = np.concatenate((y_val, batch_label), axis=0) y_data = np.concatenate((y_data, batch_data[:, :, :]), axis=0) y_glob = np.concatenate((y_glob, batch_global), axis=0) y_sc = np.concatenate((y_sc, pred[:, :, 1]), axis=0) pos_label = 1 total_loss = loss_sum * 1.0 / float(num_batches) print('The total accuracy is {0}'.format(total_correct / float(total_seen))) print('The signal accuracy is {0}'.format(total_correct_ones / float(total_seen_ones))) with h5py.File('{0}.h5'.format(FLAGS.name), "w") as fh5: dset = fh5.create_dataset("pid", data=y_val) dset = fh5.create_dataset("DNN", data=y_sc) dset = fh5.create_dataset("global", data=y_glob) dset = fh5.create_dataset("data", data=y_data)
def load_train_data(self): print("**** Loading dataset....................") train_files = provider.get_data_files( os.path.join(self.data_config.datadir_root, self.data_config.dataset_name, 'train_files.txt')) test_files = provider.get_data_files( os.path.join(self.data_config.datadir_root, self.data_config.dataset_name, 'test_files.txt')) self.label_map = provider.get_label_map( os.path.join(self.data_config.datadir_root, self.data_config.dataset_name, 'label_map.yaml')) train_file_idxs = np.arange(0, len(train_files)) np.random.shuffle(train_file_idxs) if self.is_train == True: pointcloud_data = [] feature_data = [] image_data = [] labels = [] for fn in range(len(train_files)): print(train_files[train_file_idxs[fn]]) if ".h5" in train_files[train_file_idxs[fn]]: pointcloud, image, mask_rgb, feature, label = provider.load_h5( train_files[train_file_idxs[fn]], cloud_color=self.model_config.pointcloud_color, load_feature=False) elif ".pgz" in train_files[train_file_idxs[fn]]: pointcloud, label = provider.load_pickle_file_with_label( train_files[train_file_idxs[fn]], compressed=True, cloud_color=self.model_config.pointcloud_color) pointcloud, label, idx = provider.shuffle_data( pointcloud, np.squeeze(label)) label = np.squeeze(label) pointcloud_data.extend(pointcloud) labels.extend(label) print('**** Train dataset loaded....................') self.train_pointcloud_data = np.asarray(pointcloud_data) self.train_labels = np.asarray(labels) print("**** Loading test dataset....................") test_pointcloud_data = [] test_image_data = [] test_feature_data = [] test_labels = [] for fn in range(len(test_files)): if ".h5" in test_files[fn]: pointcloud, image, mask_rgb, feature, label = provider.load_h5( test_files[fn], cloud_color=self.model_config.pointcloud_color, load_feature=False) elif ".pgz" in test_files[fn]: pointcloud, label = provider.load_pickle_file_with_label( test_files[fn], compressed=True, cloud_color=self.model_config.pointcloud_color) label = np.squeeze(label) test_pointcloud_data.extend(pointcloud) test_labels.extend(label) test_pointcloud_data = np.asarray(test_pointcloud_data) test_labels = np.asarray(test_labels) test_pointcloud_data, test_labels, idx = provider.shuffle_data( test_pointcloud_data, test_labels) self.test_data = {} self.test_data['pointcloud_data'] = test_pointcloud_data self.test_data['labels'] = test_labels
def eval_one_epoch(sess, ops, test_writer): """ ops: dict mapping from string to tf ops """ global EPOCH_CNT is_training = False test_idxs = np.arange(0, len(TEST_FILES)) # Test on all data: last batch might be smaller than BATCH_SIZE total_correct = total_correct_ones = total_seen = total_seen_ones = loss_sum = total_sig = 0 for fn in range(len(TEST_FILES)): #log_string('----' + str(fn) + '-----') current_file = os.path.join(H5_DIR, TEST_FILES[test_idxs[fn]]) current_data, current_label, global_pl = provider.load_h5(current_file, 'seg', glob=True) current_data, current_label, current_global, _ = provider.shuffle_data( current_data, np.squeeze(current_label), global_pl=global_pl) current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE batch_data, batch_label, batch_global = get_batch( current_data, current_label, current_global, start_idx, end_idx) cur_batch_size = end_idx - start_idx feed_dict = { ops['pointclouds_pl']: batch_data, ops['labels_pl']: batch_label, ops['is_training_pl']: is_training, ops['global_pl']: batch_global, } summary, step, loss_val, pred_val, coefs = sess.run( [ ops['merged'], ops['step'], ops['loss'], ops['pred'], ops['coefs'] ], #ops['coefs2'],], feed_dict=feed_dict) test_writer.add_summary(summary, step) pred_val = np.argmax(pred_val, 2) correct = np.sum((pred_val == batch_label)) correct_ones = np.sum(pred_val * batch_label) total_sig += np.sum(batch_label) total_correct_ones += correct_ones #print (correct) total_correct += correct total_seen_ones += np.sum(batch_label) total_seen += BATCH_SIZE * NUM_POINT #total_seen += BATCH_SIZE*NUM_POINT loss_sum += np.mean(loss_val) total_loss = loss_sum * 1.0 / float(num_batches) log_string('mean loss: %f' % (total_loss)) log_string('accuracy: %f' % (total_correct / float(total_seen))) log_string('The signal accuracy is {0}'.format(total_correct_ones / float(total_seen_ones))) EPOCH_CNT += 1 if FLAGS.min == 'acc': return total_correct / float(total_seen) else: return total_loss
LOG_DIR = FLAGS.log_dir if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR) os.system('cp model.py %s' % (LOG_DIR)) os.system('cp train.py %s' % (LOG_DIR)) LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w') LOG_FOUT.write(str(FLAGS)+'\n') MAX_NUM_POINT = 16384 NUM_CLASSES = 13 BN_INIT_DECAY = 0.5 BN_DECAY_DECAY_RATE = 0.5 BN_DECAY_DECAY_STEP = float(DECAY_STEP) BN_DECAY_CLIP = 0.99 train_data, train_label = provider.load_h5('/home/chencan/data/KITTI/object/training/hdf5/train_data.h5') train_label = train_label.reshape((-1, NUM_POINT)) def log_string(out_str): LOG_FOUT.write(out_str+'\n') LOG_FOUT.flush() print(out_str) def get_learning_rate(batch): learning_rate = tf.train.exponential_decay( BASE_LEARNING_RATE, # Base learning rate. batch * BATCH_SIZE, # Current index into the dataset. DECAY_STEP, # Decay step. DECAY_RATE, # Decay rate.
def eval_one_epoch(sess, ops, test_writer): """ ops: dict mapping from string to tf ops """ global EPOCH_CNT is_training = False test_idxs = np.arange(0, len(TEST_FILES)) # Test on all data: last batch might be smaller than BATCH_SIZE total_correct = 0 total_seen = 0 loss_sum = 0 y_val=[] for fn in range(len(TEST_FILES)): log_string('----' + str(fn) + '-----') current_file = os.path.join(H5_DIR,TEST_FILES[test_idxs[fn]]) current_data, current_label, current_global = provider.load_h5(current_file,'class',glob=True) current_data, current_label,current_global, _ = provider.shuffle_data(current_data, np.squeeze(current_label),global_pl=current_global) current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE log_string(str(datetime.now())) log_string('---- EPOCH %03d EVALUATION ----'%(EPOCH_CNT)) for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx+1) * BATCH_SIZE batch_data, batch_label,batch_global = get_batch(current_data, current_label,current_global, start_idx, end_idx) cur_batch_size = end_idx-start_idx feed_dict = {ops['pointclouds_pl']: batch_data, ops['labels_pl']: batch_label, ops['is_training_pl']: is_training, ops['global_pl']:batch_global, } if batch_idx ==0: start_time = time.time() summary, step, loss_val, pred_val, coefs, coefs2, adj = sess.run([ops['merged'], ops['step'], ops['loss'], ops['pred'], ops['coefs'],ops['coefs2'], ops['adj'],], feed_dict=feed_dict) if batch_idx ==0: duration = time.time() - start_time log_string("Eval time: "+str(duration)) test_writer.add_summary(summary, step) pred=pred_val pred_val = np.argmax(pred_val, 1) correct = np.sum(pred_val == batch_label) #print (correct) total_correct += correct total_seen += (BATCH_SIZE) loss_sum += np.mean(loss_val) if len(y_val)==0: y_val=batch_label y_sc=pred[:,1] else: y_val=np.concatenate((y_val,batch_label),axis=0) y_sc=np.concatenate((y_sc,pred[:,1]),axis=0) fpr, tpr, thresholds = metrics.roc_curve(y_val, y_sc, pos_label=1) bineff30 = np.argmax(tpr>0.3) log_string('1/effB at {0} effS: {1}'.format(tpr[bineff30],1.0/fpr[bineff30])) total_loss = loss_sum*1.0 / float(num_batches) log_string('mean loss: %f' % (total_loss)) log_string('accuracy: %f' % (total_correct / float(total_seen))) EPOCH_CNT += 1 if FLAGS.min == 'acc': return total_correct / float(total_seen) else: return total_loss
FLAGS = parser.parse_args() BATCH_SIZE = FLAGS.batch_size NUM_POINT = FLAGS.num_point MODEL_PATH = FLAGS.model_path GPU_INDEX = FLAGS.gpu DUMP_DIR = FLAGS.dump_dir if not os.path.exists(DUMP_DIR): os.mkdir(DUMP_DIR) NUM_CLASSES = 4 LOG_FOUT = open(os.path.join(DUMP_DIR, 'log_evaluate.txt'), 'w') LOG_FOUT.write(str(FLAGS)+'\n') test_data, test_label = provider.load_h5('/home/chencan/dataset/kitti/test_data.h5') test_label = test_label.reshape((-1, NUM_POINT)) def log_string(out_str): LOG_FOUT.write(out_str + '\n') LOG_FOUT.flush() print(out_str) def evaluate(): with tf.device('/gpu:' + str(GPU_INDEX)): pointclouds_pl, labels_pl = placeholder_inputs(BATCH_SIZE, NUM_POINT) is_training_pl = tf.placeholder(tf.bool, shape=())
def eval_one_epoch(sess, ops, test_writer): """ ops: dict mapping from string to tf ops """ global EPOCH_CNT is_training = False loss_sum = 0 y_source = [] current_data_pl, current_label = provider.load_h5(TEST_FILE, 'class') if multi: current_label = np.argmax(current_label, axis=-1) current_data_pl, current_label, _ = provider.shuffle_data( current_data_pl, np.squeeze(current_label)) file_size = current_data_pl.shape[0] num_batches = file_size // (BATCH_SIZE) #num_batches = 4 log_string(str(datetime.now())) log_string('---- EPOCH %03d EVALUATION ----' % (EPOCH_CNT)) for batch_idx in range(num_batches): start_idx = batch_idx * (BATCH_SIZE) end_idx = (batch_idx + 1) * (BATCH_SIZE) batch_data_pl, batch_label = get_batch(current_data_pl, current_label, start_idx, end_idx) mask_padded = batch_data_pl[:, :, 2] == 0 feed_dict = { ops['pointclouds_pl']: batch_data_pl, ops['labels_pl']: batch_label, ops['is_training']: is_training, ops['mask_pl']: mask_padded.astype(float), } if batch_idx == 0: start_time = time.time() summary, step, loss, pred, lr = sess.run([ ops['merged'], ops['step'], ops['loss'], ops['pred'], ops['learning_rate'] ], feed_dict=feed_dict) if batch_idx == 0: duration = time.time() - start_time log_string("Eval time: " + str(duration)) log_string("Learning rate: " + str(lr)) #log_string("{}".format(sub_feat)) test_writer.add_summary(summary, step) loss_sum += np.mean(loss) if len(y_source) == 0: y_source = np.squeeze(pred) else: y_source = np.concatenate((y_source, np.squeeze(pred)), axis=0) if multi: name_convert = { 0: 'Gluon', 1: 'Quark', 2: 'Z', 3: 'W', 4: 'Top', } label = current_label[:num_batches * (BATCH_SIZE)] for isample in np.unique(label): fpr, tpr, _ = metrics.roc_curve(label == isample, y_source[:, isample], pos_label=1) log_string("Class: {}, AUC: {}".format(name_convert[isample], metrics.auc(fpr, tpr))) bineff = np.argmax(fpr > 0.1) log_string('SOURCE: effS at {0} effB = {1}'.format( tpr[bineff], fpr[bineff])) log_string('mean loss: %f' % (loss_sum * 1.0 / float(num_batches))) else: fpr, tpr, _ = metrics.roc_curve(current_label[:num_batches * (BATCH_SIZE)], y_source[:, 1], pos_label=1) log_string("AUC: {}".format(metrics.auc(fpr, tpr))) bineff = np.argmax(tpr > 0.3) log_string('SOURCE: 1/effB at {0} effS = {1}'.format( tpr[bineff], 1.0 / fpr[bineff])) log_string('mean loss: %f' % (loss_sum * 1.0 / float(num_batches))) EPOCH_CNT += 1 return loss_sum * 1.0 / float(num_batches)
def eval_one_epoch(sess, ops): is_training = False total_correct = total_sig = total_correct_ones = total_seen = total_seen_ones = loss_sum = 0 eval_idxs = np.arange(0, len(EVALUATE_FILES)) y_pred = [] for fn in range(len(EVALUATE_FILES)): current_file = os.path.join(H5_DIR, EVALUATE_FILES[eval_idxs[fn]]) current_data, current_label = provider.load_h5(current_file, 'seg') full_data = current_data if current_data.shape[2] > NFEATURES: print('puppi not used') current_data = current_data[:, :, :NFEATURES] if current_data.shape[1] > NUM_POINT: print('Using less points') current_data = current_data[:, :NUM_POINT] current_label = current_label[:, :NUM_POINT] add_list = [ 'PFNoPU', 'puppiPU', 'chs', 'NPU', 'CHS_MET', 'PUPPI_MET', #'puppiNoPU', ] adds = provider.load_add(current_file, add_list) if not FLAGS.is_data: current_truth = adds['PFNoPU'] current_truth = preprocessing(current_data, current_truth) else: add_list.append('nLeptons') current_truth = np.zeros((current_data.shape)) current_label = np.squeeze(current_label) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE #num_batches = 1 # if FLAGS.is_data: # num_batches = 600 for batch_idx in range(num_batches): scores = np.zeros(NUM_POINT) true = np.zeros(NUM_POINT) start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE batch_data, batch_label, batch_truth = get_batch( current_data, current_label, current_truth, start_idx, end_idx) cur_batch_size = end_idx - start_idx feed_dict = { ops['pointclouds_pl']: batch_data, ops['truth_pl']: batch_truth, ops['labels_pl']: batch_label, ops['is_training_pl']: is_training, } #,beforemax loss, pred = sess.run([ops['loss'], ops['pred']], feed_dict=feed_dict) pred_val = np.argmax(pred, 2) correct_ones = pred_val * batch_label total_sig += np.sum(batch_label == 2) total_correct_ones += np.sum(correct_ones == 4) loss_sum += np.mean(loss) if len(y_pred) == 0: y_pred = pred[:, :, 2] y_data = full_data[start_idx:end_idx] y_lab = batch_label y_add = {} for add in adds: y_add[add] = adds[add][start_idx:end_idx] else: y_pred = np.concatenate((y_pred, pred[:, :, 2]), axis=0) y_data = np.concatenate((y_data, full_data[start_idx:end_idx]), axis=0) y_lab = np.concatenate((y_lab, batch_label), axis=0) for add in adds: y_add[add] = np.concatenate( (y_add[add], adds[add][start_idx:end_idx]), axis=0) if not FLAGS.is_data: print('The signal accuracy is {0}'.format(total_correct_ones / float(total_sig))) flat_pred = y_pred.flatten() flat_lab = y_lab.flatten() flat_lab = flat_lab == 2 results = metrics.roc_curve(flat_lab, flat_pred) threshs = [0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.90, 0.95] with open(os.path.join(MODEL_PATH, 'cut_eff.txt'), 'w') as f: for thresh in threshs: bin = np.argmax(results[1] > thresh) cut = results[2][bin] f.write('eff: {}, fpr: {}, cut: {} \n'.format( results[1][bin], results[0][bin], cut)) with h5py.File(os.path.join(H5_OUT, '{0}.h5'.format(FLAGS.name)), "w") as fh5: dset = fh5.create_dataset("DNN", data=y_pred) dset = fh5.create_dataset("data", data=y_data) dset = fh5.create_dataset("pid", data=y_lab) for add in adds: dset = fh5.create_dataset(add, data=y_add[add])