def train_network(X, Y, epochs=1, train=True, filename=None, load_model=False, finetune=False, task_id=None, independent=False): global model, history if load_model: print('Loading model') input_model = open(filename[0], 'r') model = model_from_json(input_model.read()) input_model.close() model.compile(optimizer='rmsprop', loss='categorical_crossentropy') model.load_weights(filename[1]) print('Loaded(probably)') if finetune: lastLayer = None print(model.layers[0].__dict__) for layer in model.layers: if (layer.name == 'dense_1'): nn.conv = layer lastLayer = layer import h5py if not load_model or finetune: X, Y, _ = transform_data(X, Y, task_id) print('Labels shape: ' + str(Y.shape)) print('Labels look like this : ') print(Y[0]) if not finetune: nn.construct_model(X[0].shape) nn.add_new_task(len(Y[0]), independent) model = nn.get_model().model if train: print('Training started') input_model = open(filename[0], 'w') input_model.write(model.to_json()) input_model.close() # model.summary() from keras.utils.visualize_util import plot plot(model, to_file='model.png') print('Fitting') history = nn.get_model().fit(X, Y, epoch=epochs) task_dims.append(Y.shape[1]) print('Training end') if filename is not None: input_model = open(filename[0], 'w') input_model.write(model.to_json()) input_model.close() model.save_weights(filename[1], overwrite=True)
def process_train(self): tf.reset_default_graph() X = tf.placeholder(shape=[None, 784], dtype=tf.float32, name="X") Y = tf.placeholder(shape=[None, 10], dtype=tf.float32, name="Y") model = get_model(X, apply_dropout=True) loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=model) opt = tf.train.AdamOptimizer(0.001).minimize(loss) with tf.Session() as sess: if tf.train.checkpoint_exists(self.last_model + self.checkpoint): tf.train.Saver().restore(sess, self.last_model + self.checkpoint) else: tf.global_variables_initializer().run() batches = self.data.get_number_of_train_batches(self.batch_size) for batch in range(batches): x, y = self.data.get_next_train_batch(self.batch_size) res1, res2 = sess.run([loss, opt], feed_dict={X: x, Y: y}) print("\rBatch: {}/{} | Loss: {:.5f}".format( batch + 1, batches, res1.mean()), end="") save_path = tf.train.Saver().save( sess, self.last_model + self.checkpoint) print("\r")
def evaluate_accuracy(task_id, dificulty, errors=False, outputFile='result.txt', silent=False): if not silent: print('reading and processing testing data') X, Y = read_data(training=False, task_id=task_id, difficulty=dificulty) if not silent: print("Getting representation") # representation = _classifier.get_representation(task_id) # if representation is None: # representation = _classifier.default_representation(Y) Y = _classifier.labels_remove_twos(Y) representation = _classifier.find_representation(Y) print('Representation for accuracy: ', representation) if not silent: print('read') print('predicting..') raw_predicted = nn.get_model(tasks_encoded[(task_id, dificulty)]).predict(X) # _classifier.print_labels(raw_predicted[:5]) predicted = _classifier.get_normal_output(raw_predicted, representation) # print(predicted.shape) acc = accuracy(predicted, Y, raw_predicted, errors) print('Accuracy %.4f' % acc) return acc
def train(args): model = get_model(pretrained=True) criterion = torch.nn.MSELoss(reduction='sum') if (args.optimizer == 'sgd'): optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) elif (args.optimizer == 'adam'): optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) train_dataset = H_dataset(args.datasets, train=True) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch, shuffle=True, num_workers=args.num_workers) val_dataset = H_dataset(args.datasets, train=False) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch, shuffle=False, num_workers=args.num_workers) gpus = args.gpus if (len(gpus) > 0): model = model.cuda() # It doesn't support multi-card training now. for epoch in range(args.epoch): train_loss = 0 for step, (image_pairs, labels) in enumerate(train_loader): if (len(gpus) > 0): image_pairs = image_pairs.cuda() labels = labels.cuda() pred = model(image_pairs) loss = criterion(pred, labels) train_loss += loss.data loss.backward() optimizer.step() optimizer.zero_grad() if (step % 50 == 0): print("Epoch: %d | Step: %d | Train Loss: %f " % (epoch, step, train_loss / (step + 1))) val_loss = 0 print("Testing----------") for step, (image_pairs, labels) in enumerate(val_loader): if (len(gpus) > 0): image_pairs = image_pairs.cuda() labels = labels.cuda() pred = model(image_pairs) loss = criterion(pred, labels) val_loss += loss.data train_loss = train_loss / (len(train_loader)) val_loss = val_loss / (len(val_loader)) print("Epoch: %d | Train Loss: %f | Val Loss: %f " % (epoch, train_loss, val_loss)) torch.save(model.state_dict(), "checkpoints/" + str(epoch) + ".pkl")
def model_statistics_on_task(task_id, difficulty, epochs): # train for 1 X_, Y = read_data(task_id=task_id, difficulty=difficulty) X_, Y, _ = transform_data(X_, Y, task_id) nn.construct_model(X_[0].shape) nn.add_new_task(len(Y[0])) model = nn.get_model().model X = nn.get_model().make_input(X_) tasks_encoded[(task_id, difficulty)] = len(nn.tasks) - 1 acc_history = [] for epoch in range(epochs + 1): print('Epoch #', epoch) if epoch > 0: model.fit(X, Y, nb_epoch=1) acc_history.append(evaluate_accuracy(task_id, difficulty, silent=True)) return acc_history
def test(args): if (args.mode == 'cnn'): val_dataset = H_dataset(args.datasets, train=False) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch, shuffle=False, num_workers=args.num_workers) model = get_model(pretrained=True) model.load_state_dict(torch.load(args.model)) model.cuda() val_loss = 0 # print(len(val_loader)) for step, (image_pairs, labels) in enumerate(val_loader): if (len(args.gpus) > 0): image_pairs = image_pairs.cuda() labels = labels.cuda() pred = model(image_pairs) loss = metric(pred.cpu().data.numpy(), labels.cpu().data.numpy()) val_loss += loss val_loss = val_loss / (len(val_loader)) print("CNN average corner error: ", val_loss) else: val_dataset = H_dataset(args.datasets, train=False, norm=False) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch, shuffle=False, num_workers=args.num_workers) success = 0 val_loss = 0 for step, (image_pairs, labels) in enumerate(val_loader): try: H = sift_ransac(image_pairs[0].cpu().numpy()) if (H is not None): success += 1 corners = np.zeros((image_pairs.size(0), 4, 2), dtype=np.float32) corners[:, 0, :] = 0 corners[:, 1, 0] = 128 corners[:, 1, 1] = 0 corners[:, 2, 0] = 0 corners[:, 2, 1] = 128 corners[:, 3, :] = 128 corners = corners.reshape(-1, 2) pred = H_transform(corners, H) loss = metric(pred, labels.cpu().data.numpy()) val_loss += loss # print(loss) except: pass # break val_loss = val_loss / success print("SIFT average corner error: ", val_loss)
def predict(): is_training = False with tf.device('/gpu:'+str(gpu_to_use)): images_ph, edges_ph = placeholder_inputs() is_training_ph = tf.placeholder(tf.bool, shape=()) # simple model edge_logits = model.get_model(images_ph, is_training=is_training_ph) loss = model.get_loss(edge_logits, edges_ph) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Later, launch the model, use the saver to restore variables from disk, and # do some work with the model. config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True with tf.Session(config=config) as sess: if not os.path.exists(output_dir): os.mkdir(output_dir) flog = open(os.path.join(output_dir, 'log.txt'), 'w') # Restore variables from disk. ckpt_dir = './train_results/trained_models' if not load_checkpoint(ckpt_dir, sess): sess.run(tf.global_variables_initializer()) if not os.path.exists('data/mv-rnn'): os.makedirs('data/mv-rnn') for l in range(len(TESTING_FILE_LIST)): images = np.zeros((1, NUM_VIEWS, IMAGE_SIZE, IMAGE_SIZE, 1)) model_name = TESTING_FILE_LIST[l] if not os.path.exists('data/mv-rnn/' + model_name): os.makedirs('data/mv-rnn/' + model_name) for v in range(NUM_VIEWS): images[0, v, :, :, 0] = np.array(scipy.ndimage.imread('data/rgb/' + model_name + '/RGB-' + str(v).zfill(3) + '.png', mode = 'L'), dtype=np.float32) edge_logits_val = sess.run(edge_logits, feed_dict={images_ph: images, is_training_ph: is_training}) edges = sigmoid(edge_logits_val) for v in range(NUM_VIEWS): scipy.misc.imsave('data/mv-rnn' + '/' + model_name + '/MV-RNN-' + str(v).zfill(3) + '.png', edges[0, v, :, :, 0]) printout(flog, '[%2d/%2d] model %s' % ((l+1), len(TESTING_FILE_LIST), TESTING_FILE_LIST[l])) printout(flog, '----------')
def main(): device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") # Build model architecture num_channels = 3 model_name = "resnet152" im_size = 224 num_classes = 1000 model = network.get_model(name=model_name, pretrained=True) model.to(device) test_loader = get_data_loader() evaluate(model, test_loader, device)
def get_original_output(task_id, difficulty): X, Y = read_data(training=False, task_id=task_id, difficulty=difficulty) Y = _classifier.labels_remove_twos(Y) representation = _classifier.find_representation(Y) raw_predicted = nn.get_model(tasks_encoded[(task_id, difficulty)]).predict(X) predicted = _classifier.get_normal_output(raw_predicted, representation) def transform_single_label(label, how): i = 0 new = [] for id in range(len(how)): if how[id]==2: new.append(0) else: new.append(label[i]) i += 1 return how return list(map(lambda label: transform_single_label(label, Y[0]), predicted))
def CNN_stitch(images, trained_model): imageA = images[0] imageB = images[1] model = get_model(pretrained=True) model.load_state_dict(torch.load(trained_model)) min_w = min(imageB.shape[0], imageB.shape[1]) scale = min_w / 128. imageA = imageA[imageA.shape[0]//2-min_w//2:imageA.shape[0]//2+min_w//2,\ imageA.shape[1]//2-min_w//2:imageA.shape[1]//2+min_w//2] imageB = imageB[imageB.shape[0]//2-min_w//2:imageB.shape[0]//2+min_w//2,\ imageB.shape[1]//2-min_w//2:imageB.shape[1]//2+min_w//2] imageA_gray = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY) imageB_gray = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY) imageA_gray = cv2.resize(imageA_gray, (128, 128)) imageB_gray = cv2.resize(imageB_gray, (128, 128)) # cv2.imwrite("a.jpg",imageA_gray) # cv2.imwrite("b.jpg",imageB_gray) a = imageA_gray.astype(np.float32) b = imageB_gray.astype(np.float32) a = (a - 127.5) / 127.5 b = (b - 127.5) / 127.5 x = np.zeros((1, 3, 128, 128), dtype=np.float32) x[0, 0, :, :] = a x[0, 1, :, :] = b x = torch.from_numpy(x) result = model(x).data.numpy()[0] * 128 * scale print(result) # print(result*128) imageCorners = np.array([[0, 0], [min_w, 0], [0, min_w], [min_w, min_w]]) imagePerturbedCorners = np.array([[0+result[0],0+result[1]],[min_w+result[2],0+result[3]],\ [0+result[4],min_w+result[5]],[min_w+result[6],min_w+result[7]]]) H = cv2.getPerspectiveTransform(np.float32(imageCorners), \ np.float32(imagePerturbedCorners)) print(H) result = cv2.warpPerspective( imageA, H, (imageA.shape[1] + imageB.shape[1], imageB.shape[0])) result[0:imageB.shape[0], 0:imageB.shape[1]] = imageB return result
def process_test(self): results = [] tf.reset_default_graph() X = tf.placeholder(shape=[None, 784], dtype=tf.float32, name="X") Y = tf.placeholder(shape=[None, 10], dtype=tf.float32, name="Y") model = get_model(X, apply_dropout=False) res = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1)) res = tf.cast(res, tf.float32) with tf.Session() as sess: tf.train.Saver().restore(sess, self.last_model + self.checkpoint) batches = self.data.get_number_of_test_batches(self.batch_size) for batch in range(batches): x, y = self.data.get_next_test_batch(self.batch_size) results.append(sess.run(res, feed_dict={X: x, Y: y})) print("\rBatch: {}/{}".format(batch + 1, batches), end="") return np.array(results).mean()
def evaluate_accuracy(task_id, difficulty, errors=False, outputFile='result.txt'): print('reading and processing testing data') X, Y = read_data(training=False, task_id=task_id, difficulty=difficulty) print("Getting representation") print('read') Y = _classifier.labels_remove_twos(Y) print('predicting..') representation = _classifier.find_representation(Y) if (task_id, difficulty) in tasks.keys(): model = nn.get_model(tasks[(task_id, difficulty)]) else: raise Exception('Task unknown') return raw_predicted = model.predict(X, verbose=1) predicted = _classifier.get_normal_output(raw_predicted, representation) acc = accuracy(predicted, Y, raw_predicted, errors) print('Accuracy %.4f' % acc) return acc
def main(argv): load_checkpoint = False training_data_path = '../TrainingData/ideal_dchg.csv' try: opts, args = getopt.getopt(argv, "hrd:", ["help", "resume", "data="]) except getopt.GetoptError: print('main.py \n -r to load latest checkpoint') sys.exit(2) for opt, arg in opts: if opt in ("-h", "--help"): print('main.py \n -r to load latest checkpoint') sys.exit() elif opt in ("-r", "--resume"): load_checkpoint = True elif opt in ("-d", "--data"): training_data_path = arg # Get an example model model = network.get_model(layer_count=2, neurons_per_layer=16, use_bias=True) if load_checkpoint: model.load_weights(CHECKPOINT_NAME) # Train the model trainModel(training_data_path=training_data_path, omit_saturated=True, epochs=100, batch_size=64, save_best=False, early_stopping=False, tensorboard=False, patience=8, model=model)
def train(): with tf.Graph().as_default(): with tf.device('/gpu:' + str(FLAGS.gpu)): images_ph, edges_ph = placeholder_inputs() is_training_ph = tf.placeholder(tf.bool, shape=()) queue = tf.FIFOQueue(capacity=10*batch_size, dtypes=[tf.float32, tf.float32],\ shapes=[[NUM_VIEWS, IMAGE_SIZE, IMAGE_SIZE, 1], [NUM_VIEWS, IMAGE_SIZE, IMAGE_SIZE, 1]]) enqueue_op = queue.enqueue([images_ph, edges_ph]) dequeue_images_ph, dequeue_edges_ph = queue.dequeue_many( batch_size) # model and loss edge_logits = model.get_model(dequeue_images_ph, is_training=is_training_ph) loss = model.get_loss(edge_logits, dequeue_edges_ph) # optimization total_var = tf.trainable_variables() train_step = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE).minimize(loss, var_list=total_var) # write logs to the disk flog = open(os.path.join(LOG_STORAGE_PATH, 'log.txt'), 'w') saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True sess = tf.Session(config=config) ckpt_dir = './train_results/trained_models' if not load_checkpoint(ckpt_dir, sess): sess.run(tf.global_variables_initializer()) train_writer = tf.summary.FileWriter(SUMMARIES_FOLDER + '/train', sess.graph) test_writer = tf.summary.FileWriter(SUMMARIES_FOLDER + '/test') fcmd = open(os.path.join(LOG_STORAGE_PATH, 'cmd.txt'), 'w') fcmd.write(str(FLAGS)) fcmd.close() def train_one_epoch(epoch_num): is_training = True num_data = len(TRAINING_FILE_LIST) num_batch = num_data // batch_size loss_acc = 0.0 display_mark = max([num_batch // 4, 1]) for i in range(num_batch): _, loss_val = sess.run([train_step, loss], feed_dict={is_training_ph: is_training}) loss_acc += loss_val if ((i + 1) % display_mark == 0): printout( flog, 'Epoch %3d/%3d - Iter %4d/%d' % (epoch_num + 1, TRAINING_EPOCHES, i + 1, num_batch)) printout(flog, 'total loss: %f' % (loss_acc / (i + 1))) loss_acc = loss_acc * 1.0 / num_batch printout(flog, '\tMean total Loss: %f' % loss_acc) if not os.path.exists(MODEL_STORAGE_PATH): os.mkdir(MODEL_STORAGE_PATH) coord = tf.train.Coordinator() for num_thread in range(4): t = StoppableThread(target=load_and_enqueue, args=(sess, enqueue_op, images_ph, edges_ph)) t.setDaemon(True) t.start() coord.register_thread(t) for epoch in range(TRAINING_EPOCHES): printout( flog, '\n>>> Training for the epoch %d/%d ...' % (epoch + 1, TRAINING_EPOCHES)) train_one_epoch(epoch) if (epoch + 1) % 1 == 0: cp_filename = saver.save( sess, os.path.join(MODEL_STORAGE_PATH, 'epoch_' + str(epoch + 1) + '.ckpt')) printout( flog, 'Successfully store the checkpoint model into ' + cp_filename) flog.flush() flog.close()
def predict(): with tf.device('/gpu:' + str(gpu_to_use)): pointgrid_ph, seg_label_ph = placeholder_inputs() is_training_ph = tf.placeholder(tf.bool, shape=()) pred_seg = model.get_model(pointgrid_ph, is_training=is_training_ph) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True with tf.Session(config=config) as sess: if not os.path.exists(output_dir): os.mkdir(output_dir) flog = open(os.path.join(output_dir, 'log_test.txt'), 'w') # Restore variables from disk. if not load_checkpoint(ckpt_dir, sess): exit() is_training = False gt_classes = [0 for _ in range(model.NUM_CATEGORY)] positive_classes = [0 for _ in range(model.NUM_CATEGORY)] true_positive_classes = [0 for _ in range(model.NUM_CATEGORY)] for i in range(test_data.shape[0]): pc = np.squeeze(test_data[i, :, :]) labels = np.squeeze(test_label[i, :]).astype(int) seg_label = model.integer_label_to_one_hot_label(labels) pointgrid, pointgrid_label, index = model.pc2voxel(pc, seg_label) pointgrid = np.expand_dims(pointgrid, axis=0) pointgrid_label = np.expand_dims(pointgrid_label, axis=0) feed_dict = { pointgrid_ph: pointgrid, seg_label_ph: pointgrid_label, is_training_ph: is_training, } pred_seg_val = sess.run(pred_seg, feed_dict=feed_dict) pred_seg_val = pred_seg_val[0, :, :, :, :, :] pred_point_label = model.populateOneHotSegLabel( pc, pred_seg_val, index) if purify == True: pre_label = pred_point_label for i in range(pc.shape[0]): #one point cloud num 2500--2800 idx = np.argsort(np.sum((pc[i, :] - pc)**2, axis=1)) j, L = 0, [] for _ in range(knn): if (idx[j] == i): j += 1 L.append(pre_label[idx[j]]) j += 1 majority = max(set(L), key=L.count) if (pre_label[i] == 0 or len(set(L)) == 1): pred_point_label[i] = majority for j in range(pred_point_label.shape[0]): gt_l = int(labels[j]) pred_l = int(pred_point_label[j]) gt_classes[gt_l] += 1 positive_classes[pred_l] += 1 true_positive_classes[gt_l] += int(gt_l == pred_l) printout(flog, 'gt_l count:{}'.format(gt_classes)) printout(flog, 'positive_classes count:{}'.format(positive_classes)) printout( flog, 'true_positive_classes count:{}'.format(true_positive_classes)) iou_list = [] for i in range(model.SEG_PART): try: iou = true_positive_classes[i] / float( gt_classes[i] + positive_classes[i] - true_positive_classes[i]) except ZeroDivisionError: iou = 0 print('here no room') finally: iou_list.append(iou) printout(flog, 'IOU:{}'.format(iou_list)) printout( flog, 'ACC:{}'.format( sum(true_positive_classes) * 1.0 / (sum(positive_classes)))) printout(flog, 'mIOU:{}'.format(sum(iou_list) / float(model.SEG_PART)))
def main(network_name): num_gpu = torch.cuda.device_count() args = parser.parse_args() args.network = network_name print(args) save_path = args.save_path + '_' + args.network if args.need_save_checkpoint: if os.path.exists(save_path): os.system('rm -rf %s/*' % (save_path)) else: os.system('mkdir %s' % (save_path)) if args.need_save_accuracy_log: log_file = open('log_%s.txt' % (args.network), 'w') train_dataset = data_loader(args, train=True) test_dataset = data_loader(args, train=False) train_loader = data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) test_loader = data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) model = get_model(args).cuda() if args.need_multi_gpus: model = DataParallel(model, device_ids=range(num_gpu)) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) loss_func = nn.CrossEntropyLoss() loss_func = loss_func.cuda() train_total_step = len(train_loader) test_total_step = len(test_loader) for epoch in range(args.epoch): loss_sum = 0 model.train() train_class_correct = list(0. for i in range(args.class_num)) train_class_total = list(0. for i in range(args.class_num)) for i, (images, label) in enumerate(train_loader): images = Variable(images.cuda()) label = Variable(label.cuda()) output = model(images) prediction = torch.argmax(output, 1) res = prediction == label for label_idx in range(len(label)): label_single = label[label_idx] train_class_correct[label_single] += res[label_idx].item() train_class_total[label_single] += 1 loss = loss_func(output, label) loss_sum += loss optimizer.zero_grad() loss.backward() optimizer.step() if i % args.display_train_interval == 0: sys.stdout.write( '[TRAIN net] Epoch [%d/%d], Step[%d/%d], loss:%.6f\r' % (epoch + 1, args.epoch, i, train_total_step, loss.cpu().data.numpy())) print() loss_mean = loss_sum / train_total_step if args.need_save_checkpoint: if args.need_multi_gpus: torch.save( model.module.state_dict(), "{}/net{}_{}.pth".format(save_path, epoch, loss_mean.cpu().data.numpy())) else: torch.save( model.state_dict(), "{}/net{}_{}.pth".format(save_path, epoch, loss_mean.cpu().data.numpy())) print('[LOSS] %f' % (loss_mean.cpu().data.numpy())) print() model.eval() test_class_correct = list(0. for i in range(args.class_num)) test_class_total = list(0. for i in range(args.class_num)) for i, (images, label) in enumerate(test_loader): images = Variable(images.cuda()) label = Variable(label.cuda()) output = model(images) prediction = torch.argmax(output, 1) res = prediction == label for label_idx in range(len(label)): label_single = label[label_idx] test_class_correct[label_single] += res[label_idx].item() test_class_total[label_single] += 1 if i % args.display_test_interval == 0: sys.stdout.write('[TEST] Step[%d/%d]\r' % (i, test_total_step)) print() acc_str = '[NET %s][EPOCH %3d/%3d] Train Accuracy: %f\tTest Accuracy: %f' % ( args.network, epoch + 1, args.epoch, sum(train_class_correct) / sum(train_class_total), sum(test_class_correct) / sum(test_class_total)) acc_str += '\t[TRAIN]\t' for acc_idx in range(len(train_class_correct)): try: acc = train_class_correct[acc_idx] / train_class_total[acc_idx] except: acc = 0 finally: acc_str += '\taccID:%d\tacc:%f\t' % (acc_idx + 1, acc) acc_str += '\t[TEST]\t' for acc_idx in range(len(test_class_correct)): try: acc = test_class_correct[acc_idx] / test_class_total[acc_idx] except: acc = 0 finally: acc_str += '\taccID:%d\tacc:%f\t' % (acc_idx + 1, acc) print(acc_str) if args.need_save_accuracy_log: log_file.write(acc_str + '\n') log_file.flush() if args.need_save_accuracy_log: log_file.close()
def train(): with tf.Graph().as_default(): with tf.device('/gpu:' + str(FLAGS.gpu)): pointgrid_ph, cat_label_ph, seg_label_ph = placeholder_inputs() is_training_ph = tf.placeholder(tf.bool, shape=()) queue = tf.FIFOQueue(capacity=20*batch_size, dtypes=[tf.float32, tf.float32, tf.float32],\ shapes=[[model.N, model.N, model.N, model.NUM_FEATURES],\ [model.NUM_CATEGORY], [model.N, model.N, model.N, model.K+1, model.NUM_SEG_PART]]) enqueue_op = queue.enqueue( [pointgrid_ph, cat_label_ph, seg_label_ph]) dequeue_pointgrid, dequeue_cat_label, dequeue_seg_label = queue.dequeue_many( batch_size) # model pred_cat, pred_seg = model.get_model(dequeue_pointgrid, is_training=is_training_ph) # loss total_loss, cat_loss, seg_loss = model.get_loss( pred_cat, dequeue_cat_label, pred_seg, dequeue_seg_label) # optimization total_var = tf.trainable_variables() step = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE).minimize(total_loss, var_list=total_var) # write logs to the disk flog = open(os.path.join(LOG_STORAGE_PATH, 'log.txt'), 'w') saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True sess = tf.Session(config=config) ckpt_dir = './train_results/trained_models' if not load_checkpoint(ckpt_dir, sess): sess.run(tf.global_variables_initializer()) train_writer = tf.summary.FileWriter(SUMMARIES_FOLDER + '/train', sess.graph) test_writer = tf.summary.FileWriter(SUMMARIES_FOLDER + '/test') fcmd = open(os.path.join(LOG_STORAGE_PATH, 'cmd.txt'), 'w') fcmd.write(str(FLAGS)) fcmd.close() def train_one_epoch(epoch_num): is_training = True num_data = len(TRAINING_FILE_LIST) num_batch = num_data // batch_size total_loss_acc = 0.0 cat_loss_acc = 0.0 seg_loss_acc = 0.0 display_mark = max([num_batch // 4, 1]) for i in range(num_batch): _, total_loss_val, cat_loss_val, seg_loss_val = sess.run( [step, total_loss, cat_loss, seg_loss], feed_dict={is_training_ph: is_training}) total_loss_acc += total_loss_val cat_loss_acc += cat_loss_val seg_loss_acc += seg_loss_val if ((i + 1) % display_mark == 0): printout( flog, 'Epoch %d/%d - Iter %d/%d' % (epoch_num + 1, TRAINING_EPOCHES, i + 1, num_batch)) printout(flog, 'Total Loss: %f' % (total_loss_acc / (i + 1))) printout( flog, 'Classification Loss: %f' % (cat_loss_acc / (i + 1))) printout( flog, 'Segmentation Loss: %f' % (seg_loss_acc / (i + 1))) printout(flog, '\tMean Total Loss: %f' % (total_loss_acc / num_batch)) printout( flog, '\tMean Classification Loss: %f' % (cat_loss_acc / num_batch)) printout( flog, '\tMean Segmentation Loss: %f' % (seg_loss_acc / num_batch)) if not os.path.exists(MODEL_STORAGE_PATH): os.mkdir(MODEL_STORAGE_PATH) coord = tf.train.Coordinator() for num_thread in range(16): t = StoppableThread(target=load_and_enqueue, args=(sess, enqueue_op, pointgrid_ph, cat_label_ph, seg_label_ph)) t.setDaemon(True) t.start() coord.register_thread(t) for epoch in range(TRAINING_EPOCHES): printout( flog, '\n>>> Training for the epoch %d/%d ...' % (epoch + 1, TRAINING_EPOCHES)) train_one_epoch(epoch) if (epoch + 1) % 1 == 0: cp_filename = saver.save( sess, os.path.join(MODEL_STORAGE_PATH, 'epoch_' + str(epoch + 1) + '.ckpt')) printout( flog, 'Successfully store the checkpoint model into ' + cp_filename) flog.flush() flog.close()
def predict(): is_training = False with tf.device('/gpu:' + str(gpu_to_use)): the_model_ph, cat_label_ph, seg_label_ph = placeholder_inputs() is_training_ph = tf.placeholder(tf.bool, shape=()) # model pred_cat, pred_seg = model.get_model(the_model_ph, is_training=is_training_ph) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Later, launch the model, use the saver to restore variables from disk, and # do some work with the model. config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True with tf.Session(config=config) as sess: if not os.path.exists(output_dir): os.mkdir(output_dir) flog = open(os.path.join(output_dir, 'log.txt'), 'w') # Restore variables from disk. ckpt_dir = './train_results/trained_models' if not load_checkpoint(ckpt_dir, sess): exit() if not os.path.exists('../data/ShapeNet/test-object'): os.mkdir('../data/ShapeNet/test-object') map_table = np.zeros((10, 10), dtype=int) y_true = [] y_pred = [] start_all = datetime.now() total_time = datetime.now() - datetime.now() incorrect = [] preficted = [] ccc = 0 avg_cat_accuracy = 0.0 cat_accuracy = np.zeros((model.NUM_CATEGORY), dtype=np.float32) cat_obj = np.zeros((model.NUM_CATEGORY), dtype=np.int32) avg_iou = 0.0 cat_iou = np.zeros((model.NUM_CATEGORY), dtype=np.float32) for loop in range(len(TESTING_FILE_LIST)): start_one = datetime.now() mat_content = scipy.io.loadmat('../data/ShapeNet/test/' + TESTING_FILE_LIST[loop] + '.mat') pc = mat_content['points'] labels = np.squeeze(mat_content['labels']) category = mat_content['category'][0][0] cat_label = model.integer_label_to_one_hot_label(category) category = int(category) cat_obj[category] += 1 seg_label = model.integer_label_to_one_hot_label(labels) the_model, the_model_label, index = model.pc2voxel(pc, seg_label) the_model = np.expand_dims(the_model, axis=0) cat_label = np.expand_dims(cat_label, axis=0) the_model_label = np.expand_dims(the_model_label, axis=0) feed_dict = { the_model_ph: the_model, cat_label_ph: cat_label, seg_label_ph: the_model_label, is_training_ph: is_training, } pred_cat_val, pred_seg_val = sess.run([pred_cat, pred_seg], feed_dict=feed_dict) pred_cat_val = np.argmax(pred_cat_val[0, :], axis=0) pred_seg_val = pred_seg_val[0, :, :, :, :, :] avg_cat_accuracy += (pred_cat_val == category) cat_accuracy[category] += (pred_cat_val == category) stop_one = datetime.now() print('Time: ', stop_one - start_one) total_time += (stop_one - start_one) print('total Time: ', total_time) y_true.insert(loop, category) y_pred.insert(loop, pred_cat_val) if pred_cat_val != category: incorrect.insert(ccc, TESTING_FILE_LIST[loop]) preficted.insert(ccc, category2name[pred_cat_val]) ccc += 1 print(category2name[pred_cat_val]) pred_point_label = model.populateOneHotSegLabel( pc, pred_seg_val, index) if purify == True: pre_label = pred_point_label for i in range(pc.shape[0]): idx = np.argsort(np.sum((pc[i, :] - pc)**2, axis=1)) j, L = 0, [] for _ in range(knn): if (idx[j] == i): j += 1 L.append(pre_label[idx[j]]) j += 1 majority = max(set(L), key=L.count) if (pre_label[i] == 0 or len(set(L)) == 1): pred_point_label[i] = majority iou = model.intersection_over_union(pred_point_label, labels) avg_iou += iou cat_iou[category] += iou #if (cat_obj[category] <= 3): # output_color_point_cloud(pc, pred_point_label, '../data/ShapeNet/test-object/' + category2name[category] + '_' + str(cat_obj[category]) + '.obj') printout( flog, '%d/%d %s' % ((loop + 1), len(TESTING_FILE_LIST), TESTING_FILE_LIST[loop])) printout(flog, '----------') avg_cat_accuracy /= float(np.sum(cat_obj)) avg_iou /= float(np.sum(cat_obj)) printout(flog, 'Average classification accuracy: %f' % avg_cat_accuracy) printout(flog, 'Average IoU: %f' % avg_iou) printout( flog, 'CategoryName, CategorySize, ClassificationAccuracy, SegmentationIoU' ) for i in range(model.NUM_CATEGORY): cat_accuracy[i] /= float(cat_obj[i]) cat_iou[i] /= float(cat_obj[i]) printout( flog, '\t%s (%d): %f, %f' % (category2name[i], cat_obj[i], cat_accuracy[i], cat_iou[i])) # Print the confusion matrix print(metrics.confusion_matrix(y_true, y_pred)) # Print the precision and recall, among other metrics print(metrics.classification_report(y_true, y_pred, digits=3)) stop_all = datetime.now() print('Time: ', stop_all - start_all) print(incorrect) print(preficted)
import pickle import random db = pymongo.MongoClient().maxilafacial fileDB = gridfs.GridFS(db) #Graph Definition num_point = 4096 batch_size = 4 input_tensor = tf.placeholder(tf.float32, shape=(None, num_point, 3), name="target_input") gt_tensor = tf.placeholder(tf.int32, shape=(None, num_point)) is_training = tf.placeholder(tf.bool, name="target_isTraining") output_tensor = network.get_model(input_tensor, is_training) output_data_tensor = tf.argmax(output_tensor, 2, name="target_output") loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=gt_tensor, logits=output_tensor) loss_op = tf.reduce_mean(loss) #Add Regularization regularizer = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name ]) # loss_op = loss_op + 0.001 * regularizer optimizer = tf.train.AdamOptimizer(1e-4, 0.5) train_op = optimizer.minimize(loss_op) def shuffle_point_sequence(input_batch):
def run_eval(args): ckpt = torch.load(args.ckpt) ckpt_opt = ckpt['opts'] transformer = get_model(ckpt_opt.net)(ckpt_opt).cuda() transformer.load_state_dict(ckpt['transformer']) transformer.eval() up_sampler = nn.Upsample(scale_factor=2, mode='bicubic', align_corners=True).cuda() video_loader = db.HRLRDLR_VideoLoader(args, video_size=(args.size[0], args.size[1])) print('Number of samples: {}'.format(len(video_loader))) criterionMSE = nn.MSELoss().cuda() counter = 0 with torch.no_grad(): for i in range(len(video_loader)): data_sample, _vname = video_loader.get_item(i) print('Processing ' + _vname) DLR_frames = data_sample['DLR'] LR_frames = data_sample['LR'] HR_frames = data_sample['HR'] hr_writer = YUVWriter(os.path.join(args.out, _vname + '_HR.yuv'), (args.size[0] * 2, args.size[1] * 2)) lr_writer = YUVWriter(os.path.join(args.out, _vname + '_LR.yuv'), (args.size[0], args.size[1])) errors = AverageMeters( args, _vname, ['HR-Y-PSNR', 'HR-Y-SSIM', 'LR-Y-PSNR', 'LR-Y-SSIM']) for frame_id in range(len(DLR_frames)): # Init data DLR = torch.autograd.Variable( torch.from_numpy(DLR_frames[frame_id].transpose( (2, 0, 1))).float()).unsqueeze(0).cuda() / 255.0 LR = torch.autograd.Variable( torch.from_numpy(LR_frames[frame_id].transpose( (2, 0, 1))).float()).unsqueeze(0).cuda() / 255.0 HR = torch.autograd.Variable( torch.from_numpy(HR_frames[frame_id].transpose( (2, 0, 1))).float()).unsqueeze(0).cuda() / 255.0 # Inference res_residual_out, rec_residual_out, upsampled_lr = transformer( DLR[:, 0, :, :].unsqueeze(1)) HR_out = upsampled_lr + rec_residual_out HR_out = torch.cat([HR_out, up_sampler(DLR[:, 1:, :, :])], 1) LR_out = DLR[:, 0, :, :].unsqueeze(1) + res_residual_out LR_out = torch.cat([LR_out, DLR[:, 1:, :, :]], 1) # Evaluation hr_y_mse = criterionMSE(HR_out[:, 0, :, :].unsqueeze(1), HR[:, 0, :, :].unsqueeze(1)) hr_y_psnr = 10 * log10(1 / hr_y_mse.item()) hr_y_ssim = pytorch_ssim.ssim(HR_out[:, 0, :, :].unsqueeze(1), HR[:, 0, :, :].unsqueeze(1)).item() lr_y_mse = criterionMSE(LR_out[:, 0, :, :].unsqueeze(1), LR[:, 0, :, :].unsqueeze(1)) lr_y_psnr = 10 * log10(1 / lr_y_mse.item()) lr_y_ssim = pytorch_ssim.ssim(LR_out[:, 0, :, :].unsqueeze(1), LR[:, 0, :, :].unsqueeze(1)).item() errors.update([hr_y_psnr, hr_y_ssim, lr_y_psnr, lr_y_ssim]) # CPU res_residual_out = res_residual_out.cpu() rec_residual_out = rec_residual_out.cpu() upsampled_lr = upsampled_lr.cpu() HR_out = HR_out.cpu() DLR = DLR.cpu() LR = LR.cpu() HR = HR.cpu() # Write frames hr_writer.write(HR_out) lr_writer.write(LR_out) # Finish hr_writer.close() lr_writer.close() errors.write_result() print('Test Completed')
def main(): # Training settings parser = argparse.ArgumentParser(description='ZAQ CIFAR.') parser.add_argument('--num_classes', type=int, default=10) parser.add_argument('--batch_size', type=int, default=256, metavar='N', help='input batch size for training (default: 256)') parser.add_argument('--test_batch_size', type=int, default=256, metavar='N', help='input batch size for testing (default: 128)') parser.add_argument('--epochs', type=int, default=300, metavar='N', help='number of epochs to train (default: 500)') parser.add_argument('--epoch_itrs', type=int, default=60) parser.add_argument('--lr_Q', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.1)') parser.add_argument('--lr_G', type=float, default=1e-3, help='learning rate (default: 0.001)') parser.add_argument('--data_root', type=str, required=True, default=None) parser.add_argument('--dataset', type=str, default='cifar10', choices=['cifar10', 'cifar100'], help='dataset name (default: cifar10)') parser.add_argument('--model', type=str, default='resnet18', choices=['mobilenetv2', 'vgg19', 'resnet18', 'resnet20', 'resnet50'], help='model name (default: resnet18)') parser.add_argument('--weight_decay', type=float, default=5e-4) parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--device', type=str, default='0', help='device for training') parser.add_argument('--seed', type=int, default=6786, metavar='S', help='random seed (default: 6786)') parser.add_argument('--ckpt', type=str, default='') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--nz', type=int, default=256) parser.add_argument("--alpha", type=float, default=1) parser.add_argument("--beta", type=float, default=0.1) parser.add_argument("--gamma", type=float, default=0.1) parser.add_argument('--test_only', action='store_true', default=False) parser.add_argument('--download', action='store_true', default=False) # quantization parser.add_argument('--weight_bit', type=int, default=6, help='bit-width for parameters') parser.add_argument('--activation_bit', type=int, default=8, help='bit-width for act') args = parser.parse_args() torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False os.environ['CUDA_VISIBLE_DEVICES'] = args.device args.device = torch.device('cuda' if torch.cuda.is_available() else "cpu") os.makedirs('checkpoint/q_model/', exist_ok=True) print(args) _, test_loader = get_dataloader(args) args.num_classes = 10 if args.dataset=='cifar10' else 100 q_model = network.get_model(args) generator = network.gan.Generator(nz=args.nz, nc=3, img_size=32) q_model.load_state_dict(torch.load(args.ckpt)) print("p_model restored from %s"%(args.ckpt)) # p_model = p_model.to(device) q_model = q_model.to(args.device) generator = generator.to(args.device) p_model = copy.deepcopy(q_model) # quantization q_model = quantize_model(q_model, args) quant_acc = test(args, q_model, test_loader, 0) print('Quat Acc=%0.4f \n' % quant_acc) p_model.eval() optimizer_Q = optim.SGD(q_model.parameters(), lr=args.lr_Q, weight_decay=args.weight_decay, momentum=0.9) optimizer_G = optim.Adam(generator.parameters(), lr=args.lr_G) scheduler_Q = optim.lr_scheduler.MultiStepLR(optimizer_Q, [100, 200], args.gamma) scheduler_G = optim.lr_scheduler.MultiStepLR(optimizer_G, [100, 200], args.gamma) best_acc = 0 if args.test_only: acc = test(args, q_model, test_loader, 0) return acc_list = [] for epoch in range(1, args.epochs + 1): # Train train(args, p_model=p_model, q_model=q_model, generator=generator, optimizer=[optimizer_Q, optimizer_G], epoch=epoch) scheduler_Q.step() scheduler_G.step() # Test acc = test(args, q_model, test_loader, epoch) acc_list.append(acc) if acc>best_acc: best_acc = acc print('Saving a best checkpoint ...') torch.save(q_model.state_dict(),"checkpoint/q_model/ZAQ-%s-%s-%sbit.pt"%(args.dataset, args.model, args.weight_bit)) torch.save(generator.state_dict(),"checkpoint/q_model/ZAQ-%s-%s-%sbit-generator.pt"%(args.dataset, args.model, args.weight_bit)) vp.add_scalar('Acc', epoch, acc) print("Best Acc=%.6f" % best_acc) import csv os.makedirs('log', exist_ok=True) with open('log/ZAQ-%s-%s-%sbit.csv'%(args.dataset, args.model, args.param_bits), 'a') as f: writer = csv.writer(f) writer.writerow(acc_list)
def main(): # Training settings parser = argparse.ArgumentParser('Pretrain P model.') parser.add_argument('--num_classes', type=int, default=10) parser.add_argument('--batch_size', type=int, default=256, metavar='N', help='input batch size for training (default: 256)') parser.add_argument('--test_batch_size', type=int, default=256, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--data_root', type=str, required=True, default=None) parser.add_argument('--epochs', type=int, default=200, metavar='N', help='number of epochs to train (default: 200)') parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.1)') parser.add_argument('--weight_decay', type=float, default=5e-4) parser.add_argument( '--dataset', type=str, default='mnist', choices=['mnist', 'cifar10', 'cifar100', 'caltech101', 'nyuv2'], help='dataset name (default: mnist)') parser.add_argument( '--model', type=str, default='resnet20', choices=['resnet18', 'resnet50', 'mobilenetv2', 'resnet20', 'vgg19'], help='model name (default: resnet20)') parser.add_argument('--step_size', type=int, default=80) parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--device', type=str, default='0', help='device for training') parser.add_argument('--seed', type=int, default=6786, metavar='S', help='random seed (default: 6786)') parser.add_argument('--ckpt', type=str, default=None) parser.add_argument( '--log_interval', type=int, default=100, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--test_only', action='store_true', default=False) parser.add_argument('--download', action='store_true', default=False) parser.add_argument('--pretrained', action='store_true', default=False) parser.add_argument('--verbose', action='store_true', default=False) args = parser.parse_args() torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False os.environ['CUDA_VISIBLE_DEVICES'] = args.device args.device = torch.device('cuda' if torch.cuda.is_available() else "cpu") os.makedirs('checkpoint/p_model/', exist_ok=True) print(args) train_loader, test_loader = get_dataloader(args) model = network.get_model(args) if args.ckpt is not None and args.pretrained: model.load_state_dict(torch.load(args.ckpt)) model = model.to(args.device) optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) best_acc = 0 scheduler = optim.lr_scheduler.StepLR(optimizer, args.step_size, 0.1) if args.test_only: acc = test(args, model, test_loader, 0) return for epoch in range(1, args.epochs + 1): # print("Lr = %.6f"%(optimizer.param_groups[0]['lr'])) train(args, model, train_loader, optimizer, epoch) acc = test(args, model, test_loader, epoch) scheduler.step() if acc > best_acc: best_acc = acc print('Saving a best checkpoint ...') torch.save( model.state_dict(), "checkpoint/p_model/%s-%s.pt" % (args.dataset, args.model)) print("Best Acc=%.6f" % best_acc)
utils.try_make_dir(save_root) utils.try_make_dir(log_root) ###################### # DataLoaders ###################### train_dataloader = dl.train_dataloader val_dataloader = dl.val_dataloader # init log logger = init_log(training=True) ###################### # Init model ###################### Model = get_model(opt.model) model = Model(opt) # 暂时还不支持多GPU # if len(opt.gpu_ids): # model = torch.nn.DataParallel(model, device_ids=opt.gpu_ids) model = model.to(device=opt.device) if opt.load: load_epoch = model.load(opt.load) start_epoch = load_epoch + 1 if opt.resume else 1 else: start_epoch = 1 model.train()
from tensorflow import keras import data import network as nt import show if __name__ == '__main__': FILENAME_NT = 'model.h5' FILENAME_TRAIN_DATA = 'data/training.csv' FILENAME_TEST_DATA = 'data/test.csv' df_train = pd.read_csv(FILENAME_TRAIN_DATA) df_test = pd.read_csv(FILENAME_TEST_DATA) X_train, y_train = data.get_train_data(df_train) X_test = data.get_test_data(df_test) model = nt.get_model() for i in range(10): image = X_test[i,:,:,0] show.show_predicted_image_with_keypoints(model, data.PIPELINE, image) # images = np.array([X_train[i,:,:,0] for i in range(10)]).reshape(10, 96, 96, 1) # show.show_predicted_images_with_keypoints(model, data.PIPELINE, images) # ------------------------------------------------------------------- # history = nt.fit_model(model, FILENAME_NT, X_train, y_train) # model.save(FILENAME_NT) # -------------------------------------------------------------------
help= 'fork 명령어를 입력할때의 체크포인트로 설정됩니다. 체크포인트 옵션을 안주면 마지막 wall time 의 model 을 가져옵니다.' ) args.add_argument('--pause', type=int, default=0, help='model 을 load 할때 1로 설정됩니다.') config = args.parse_args() # training parameters nb_epoch = config.epochs batch_size = config.batch_size num_classes = 1000 input_shape = (224, 224, 3) # input image shape """ Model """ model, base_model = get_model('triplet', 224, num_classes, opt.base_model) bind_model(base_model, config.batch_size) get_feature_layer = K.function([base_model.layers[0].input] + [K.learning_phase()], [base_model.layers[-1].output]) if config.pause: nsml.paused(scope=locals()) bTrainmode = False if config.mode == 'train': bTrainmode = True """ Load data """ print('dataset path', DATASET_PATH) output_path = ['./img_list.pkl', './label_list.pkl'] train_dataset_path = os.path.join(DATASET_PATH, 'train/train_data')
parser.add_argument( '--notify', type=int, default=100) args = parser.parse_args() cuda = torch.cuda.is_available() set_seed(seed=1, cuda=cuda) # Data train_loader, train_dataset, test_loader, test_dataset = get_data_loaders( batch_size=args.batch_size) # Model model, loss_fn, optimizer = get_model( num_classes=args.num_classes, learning_rate=args.learning_rate, cuda=cuda) start_epoch, best_accuracy = load_model(model, cuda) for epoch in range(start_epoch, args.epochs): train_model(model=model, optimizer=optimizer, train_loader=train_loader, train_dataset=train_dataset, loss_fn=loss_fn, num_epochs=args.epochs, epoch=epoch, batch_size=args.batch_size, notify=args.notify, cuda=cuda) accuracy = eval_model(model=model, test_loader=test_loader, cuda=cuda)
loss3)) if __name__ == '__main__': system_arg = parse_args() args = Param() args.load_parm(system_arg.train_config) args = args.get_parm() gluoncv.utils.random.seed(args['seed']) ctx = [mx.gpu(int(i)) for i in args['gpus'].split(',') if i.strip()] print(ctx) ctx = ctx if ctx else [mx.cpu()] net_name = '_'.join(('east', str(args['data_shape']), args['network'])) args['save_prefix'] += net_name net = get_model(args['network'], text_scale=args['data_shape']) async_net = net if args['resume']: net.load_parameters(args['resume']) async_net.load_parameters(args['resume'].strip()) else: with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") net.initialize() async_net.initialize() #for item in net.features.collect_params().items(): # if item[0].split('_')[-1] not in ['gamma', 'beta', 'mean', 'var']: # item[1].cast('float16') #for item in net.features.collect_params().items(): # if 'mobilenet' in item[0].split('_')[1]:
def main(): opt = Options() args = opt.parse_command() opt.print_items() # if setting gpu id, the using single GPU if opt.gpu: print('Single GPU Mode.') os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu # set random seed torch.manual_seed(opt.manual_seed) torch.cuda.manual_seed(opt.manual_seed) np.random.seed(opt.manual_seed) random.seed(opt.manual_seed) cudnn.benchmark = True if torch.cuda.device_count() > 1: print('Multi-GPUs Mode.') print("Let's use ", torch.cuda.device_count(), " GPUs!") else: print('Single GPU Mode.') print("Let's use GPU:", opt.gpu) if opt.restore: assert os.path.isfile(opt.restore), \ "=> no checkpoint found at '{}'".format(opt.restore) print("=> loading checkpoint '{}'".format(opt.restore)) checkpoint = torch.load(opt.restore) start_iter = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] optimizer = checkpoint['optimizer'] model = get_model(opt) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) del checkpoint # clear memory # del model_dict torch.cuda.empty_cache() else: print("=> creating Model") model = get_model(opt) print("=> model created.") start_iter = 1 best_result = None # different modules have different learning rate train_params = get_train_params(opt, model) optimizer = torch.optim.SGD(train_params, lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) if torch.cuda.device_count() == 1: from libs.trainers import single_gpu_trainer trainer = single_gpu_trainer.trainer(opt, model, optimizer, start_iter, best_result) trainer.train_eval() else: from libs.trainers import multi_gpu_trainer trainer = multi_gpu_trainer.trainer(opt, model, optimizer, start_iter, best_result) trainer.train_eval()
import utils import network import numpy as np import os (x_train, y_train), (x_test, y_test) = utils.load_data() model = network.get_model() model.fit(x_train, y_train / np.array([1920, 1080]), validation_data=(x_test, y_test / np.array([1920, 1080])), batch_size=64, epochs=200, verbose=1) model.save('model.h5')
def train(): with tf.Graph().as_default(): with tf.device('/gpu:' + str(FLAGS.gpu)): pointgrid_ph, seg_label_ph = placeholder_inputs() is_training_ph = tf.placeholder(tf.bool, shape=()) # model pred_seg = model.get_model(pointgrid_ph, is_training=is_training_ph) total_loss, seg_loss = model.get_loss(pred_seg, seg_label_ph) # optimization total_var = tf.trainable_variables() step = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE).minimize(total_loss, var_list=total_var) # write logs to the disk flog = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w') saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True sess = tf.Session(config=config) if not load_checkpoint(ckpt_dir, sess): sess.run(tf.global_variables_initializer()) # Add summary writers train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test')) fcmd = open(os.path.join(LOG_DIR, 'cmd.txt'), 'w') fcmd.write(str(FLAGS)) fcmd.close() def train_one_epoch(epoch_num, sess, train_writer): is_training = True current_data, current_label, _ = shuffle_data( train_data, train_label) num_data = train_data.shape[0] num_batch = num_data // BATCH_SIZE total_loss_acc = 0.0 seg_loss_acc = 0.0 for batch_idx in range(num_batch): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE pointgrid, pointgrid_label = transfor_data( current_data[start_idx:end_idx, :, :], current_label[start_idx:end_idx, :]) feed_dict = { is_training_ph: is_training, pointgrid_ph: pointgrid, seg_label_ph: pointgrid_label } _, total_loss_val, seg_loss_val = sess.run( [step, total_loss, seg_loss], feed_dict=feed_dict) # train_writer.add_summary(total_loss_val,seg_loss_val) total_loss_acc += total_loss_val seg_loss_acc += seg_loss_val if batch_idx % 100 == 0: print('Current batch/total batch num: %d/%d' % (batch_idx, num_batch)) printout( flog, 'Epoch %d/%d - Iter %d/%d' % (epoch_num + 1, TRAINING_EPOCHES, batch_idx + 1, num_batch)) printout( flog, 'Total Loss: %f' % (total_loss_acc / (batch_idx + 1))) printout( flog, 'Segmentation Loss: %f' % (seg_loss_acc / (batch_idx + 1))) printout(flog, '\tMean Total Loss: %f' % (total_loss_acc / num_batch)) printout( flog, '\tMean Segmentation Loss: %f' % (seg_loss_acc / num_batch)) def test_one_epoch(sess, test_writer): is_training = False current_data, current_label, _ = shuffle_data( test_data, test_label) num_data = test_data.shape[0] num_batch = num_data // BATCH_SIZE total_loss_acc = 0.0 seg_loss_acc = 0.0 total_correct = 0 total_seen = 0 total_seen_class = [0 for _ in range(model.SEG_PART)] total_correct_class = [0 for _ in range(model.SEG_PART)] for batch_idx in range(num_batch): if batch_idx % 100 == 0: print('Current batch/total batch num: %d/%d' % (batch_idx, num_batch)) start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE pointgrid, pointgrid_label = transfor_data( current_data[start_idx:end_idx, :, :], current_label[start_idx:end_idx, :]) feed_dict = { is_training_ph: is_training, pointgrid_ph: pointgrid, seg_label_ph: pointgrid_label } _, total_loss_val, seg_loss_val, pred_val = sess.run( [step, total_loss, seg_loss, pred_seg], feed_dict=feed_dict) # test_writer.add_summary(step, total_loss_val, seg_loss_val) total_loss_acc += total_loss_val seg_loss_acc += seg_loss_val pred_val = np.argmax(pred_val, 2) TP = np.sum(pred_val == current_label[start_idx:end_idx]) total_correct += TP total_seen += (BATCH_SIZE * model.NUM_POINT) for i in range(start_idx, end_idx): for j in range(model.NUM_POINT): l = current_label[i, j] total_seen_class[l] += 1 total_correct_class[l] += (pred_val[i - start_idx, j] == l) printout(flog, 'eval accuracy: %f' % (total_correct / float(total_seen))) printout( flog, 'eval avg class acc: %f' % (np.mean( np.array(total_correct_class) / np.array(total_seen_class, dtype=np.float)))) printout(flog, '\tMean Total Loss: %f' % (total_loss_acc / num_batch)) printout( flog, '\tMean Segmentation Loss: %f' % (seg_loss_acc / num_batch)) for epoch in range(TRAINING_EPOCHES): printout( flog, '\n>>> Training for the epoch %d/%d ...' % (epoch, TRAINING_EPOCHES)) train_one_epoch(epoch, sess, train_writer) # test_one_epoch(sess, test_writer) if epoch % 5 == 0: cp_filename = saver.save( sess, os.path.join(LOG_DIR, 'epoch_' + str(epoch + 1) + '.ckpt')) printout( flog, 'Successfully store the checkpoint model into ' + cp_filename) flog.flush() flog.close()
def main(): global logger, cfg logger, cfg, run_dir = get_logger_and_parser() # Settings for distributed training. torch.cuda.set_device(cfg['local_rank']) dist.init_process_group('nccl', init_method='env://') # Set model & criterion. model_path = os.path.join(run_dir, 'model') criterion = nn.CrossEntropyLoss(ignore_index=cfg['ignore_label']) model = network.get_model(criterion, cfg['auxloss'], cfg['auxloss_weight']) if main_process(): logger.info(model) if cfg['sync_bn']: if main_process(): logger.info('Convert batch norm layers to be sync.') # If you're using pytorch version below 1.3.0, we'll use manually-implemented sync_bn. # More details please refer: https://github.com/vacancy/Synchronized-BatchNorm-PyTorch. try: model = nn.SyncBatchNorm.convert_sync_batchnorm(model).cuda() except AttributeError: from misc.sync_batchnorm.batchnorm import convert_model model = convert_model(model).cuda() if main_process(): logger.info( f'Segmentation Network Total Params number: {count_model_param(model) / 1E6}M' ) check_dir_exists(model_path) # Set optimizer. Different learning rate for encoder an decoder. # params_list: 0 for encoder, 1 for decoder. param_list = [ dict(params=model.encoder.parameters(), lr=cfg['base_lr']), dict(params=model.decoder.parameters(), lr=cfg['base_lr'] * cfg['decoder_lr_mul']) ] optimizer = optim.SGD(param_list, lr=cfg['base_lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) model = nn.parallel.DistributedDataParallel( model, device_ids=[cfg['local_rank']], output_device=cfg['local_rank']) # Resume. # TODO(xwd): This method will consume more GPU memory in main GPU. Try to reduce it. if cfg['resume'] is not None and os.path.isfile(cfg['resume']): checkpoint = torch.load(cfg['resume'], map_location=torch.device('cpu')) cfg['start_epoch'] = checkpoint['epoch'] model.load_state_dict( safe_loader(checkpoint['state_dict'], use_model='multi')) optimizer.load_state_dict(checkpoint['optimizer']) del checkpoint else: model_save_path = read_newest_model_path(model_path) if model_save_path is not None: checkpoint = torch.load(model_save_path, map_location=torch.device('cpu')) cfg['start_epoch'] = checkpoint['epoch'] model.load_state_dict( safe_loader(checkpoint['state_dict'], use_model='multi')) optimizer.load_state_dict(checkpoint['optimizer']) if main_process(): logger.info( f"Pretrained checkpoint loaded. Start from epoch {cfg['start_epoch']}." ) del checkpoint # Set data loader & sampler. train_loader, train_sampler = get_dataloader() # Training. for epoch in range(cfg['start_epoch'], cfg['epochs']): if cfg['multiprocessing_distributed']: train_sampler.set_epoch(epoch) epoch_log = epoch + 1 train_epoch(train_loader, model, optimizer, epoch) if main_process() and (epoch_log % cfg['save_freq'] == 0): logger.info(model_path, f'checkpoint_{epoch_log}.pth') filename = os.path.join(model_path, 'checkpoint_{}.pth'.format(epoch_log)) logger.info(f'Saving checkpoint to: {filename}') torch.save( { 'epoch': epoch_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, filename) if epoch_log / cfg['save_freq'] > 2: deletename = os.path.join( model_path, f"checkpoint_{epoch_log - cfg['save_freq'] * 2}.pth") if os.path.exists(deletename): os.remove(deletename)
def run(self): self.signal_log.emit("Making training Data") #make 10 training data train_set = utils.make_subsample_data(original_data, original_ground_truth, size=100, sample_size=sample_size) test_data = utils.make_subsample_data(original_data, original_ground_truth, sample_size=sample_size) self.signal_log.emit("Initializing Graph...") num_point = train_set[0]['input'].shape[0] input_tensor = tf.placeholder(tf.float32, shape=(None, num_point, 3), name="target_input") gt_tensor = tf.placeholder(tf.int32, shape=(None, num_point)) is_training = tf.placeholder(tf.bool, name="target_isTraining") output_tensor = network.get_model(input_tensor, is_training) output_data_tensor = tf.argmax(output_tensor, 2, name="target_output") loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=gt_tensor, logits=output_tensor) loss_op = tf.reduce_mean(loss) regularizer = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name ]) # loss_op = loss_op + 0.001 * regularizer optimizer = tf.train.AdamOptimizer(1e-4, 0.5) train_op = optimizer.minimize(loss_op) self.signal_log.emit("Initializing weights...") sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) self.signal_log.emit("Start Training...") #for epoch in range(self.max_epoch): while True: epoch = 1 #Save # save_path = "./weights/epoch_" + str(epoch) # builder = tf.saved_model.builder.SavedModelBuilder(save_path) # builder.add_meta_graph_and_variables(sess, ['ejshim']) # builder.save() #while True: idx = 0 #for data in train_set: while idx < len(train_set): batch = train_set[idx:idx + batch_size] idx = idx + batch_size input_data = [] gt_data = [] for data in batch: input_data.append(data['input']) gt_data.append(data['gt']) # input_data = batch['input'] # gt_data = batch['gt'] [output_data, loss, _] = sess.run( [output_data_tensor, loss_op, train_op], feed_dict={ input_tensor: input_data, gt_tensor: gt_data, is_training: True }) log = str(epoch) + "/" + str(self.max_epoch - 1) + ", Loss : " + str(loss) for i in range(batch_size): utils.update_segmentation(input_poly, output_data[i], batch[i]['idx']) self.signal_log.emit(log) #run test # for data in test_data: # output_data = sess.run(output_data_tensor, feed_dict={input_tensor:[data['input']], is_training:False}) # utils.update_segmentation(input_poly, output_data[0], data['idx']) # renderWindow.Render() self.signal_log.emit("Finished")
def predict(): is_training = False with tf.device('/gpu:'+str(gpu_to_use)): pointgrid_ph, seg_label_ph = placeholder_inputs() is_training_ph = tf.placeholder(tf.bool, shape=()) pred_seg = model.get_model(pointgrid_ph, is_training=is_training_ph) saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True with tf.Session(config=config) as sess: if not os.path.exists(output_dir): os.mkdir(output_dir) flog = open(os.path.join(output_dir, 'log-6.txt'), 'w') # Restore variables from disk. ckpt_dir = './train_results_6/trained_models' if not load_checkpoint(ckpt_dir, sess): exit() if not os.path.exists('../data/ShapeNet/test-PointGrid'): os.mkdir('../data/ShapeNet/test-PointGrid') gt_classes = [0 for _ in range(model.SEG_PART)] positive_classes = [0 for _ in range(model.SEG_PART)] true_positive_classes = [0 for _ in range(model.SEG_PART)] for filelist in sorted(os.listdir(TESTING_FILE_LIST)): printout(flog,filelist) mat_content = np.load(os.path.join(TESTING_FILE_LIST,filelist)) # choice = np.random.choice(mat_content.shape[0], model.SAMPLE_NUM, replace=False) # mat_content = mat_content[choice, :] xyz = mat_content[:, 0:3] rgb = mat_content[:, 3:6] / 255.0 pc = np.concatenate((xyz, rgb), axis=1) labels = np.squeeze(mat_content[:, -1]).astype(int) seg_label = model.integer_label_to_one_hot_label(labels) pointgrid, pointgrid_label, index = model.pc2voxel(pc, seg_label) pointgrid = np.expand_dims(pointgrid, axis=0) pointgrid_label = np.expand_dims(pointgrid_label, axis=0) feed_dict = { pointgrid_ph: pointgrid, seg_label_ph: pointgrid_label, is_training_ph: is_training, } t1 = time.time() pred_seg_val = sess.run(pred_seg, feed_dict = feed_dict) # pred_seg: of size B x N x N x N x (K+1) x NUM_PART_SEG pred_seg_val = pred_seg_val[0, :, :, :, :, :] pred_point_label = model.populateOneHotSegLabel(pc, pred_seg_val, index) # pred_point_label: size n x 1 if purify == True: pre_label = pred_point_label for i in range(pc.shape[0]): #one point cloud num 2500--2800 idx = np.argsort(np.sum((pc[i, :] - pc) ** 2, axis=1)) j, L = 0, [] for _ in range(knn): if (idx[j] == i): j += 1 L.append(pre_label[idx[j]]) j += 1 majority = max(set(L), key=L.count) if (pre_label[i] == 0 or len(set(L)) == 1): pred_point_label[i] = majority t2 = time.time() print('one point cloud cost time:{}'.format(t2 - t1)) for j in range(pred_point_label.shape[0]): # gt_classes[labels[j]-1]+=1 # if int(labels[j])==int(pred_point_label[j]): # positive_classes[labels[j]-1]+=1 # else: # negative_classes[labels[j]-1]+=1 gt_l = int(labels[j]) pred_l = int(pred_point_label[j]) gt_classes[gt_l] += 1 positive_classes[pred_l] += 1 true_positive_classes[gt_l] += int(gt_l == pred_l) printout(flog, 'gt_l:{},positive_classes:{},true_positive_classes:{}'.format(gt_classes, positive_classes, true_positive_classes)) printout(flog, 'gt_l count:{}'.format(gt_classes)) printout(flog, 'positive_classes count:{}'.format(positive_classes)) printout(flog, 'true_positive_classes count:{}'.format(true_positive_classes)) iou_list = [] for i in range(model.SEG_PART): iou = true_positive_classes[i] / float(gt_classes[i] + positive_classes[i] - true_positive_classes[i]) iou_list.append(iou) printout(flog, 'IOU:{}'.format(iou_list)) printout(flog, 'ACC:{}'.format(sum(true_positive_classes) / sum(positive_classes))) printout(flog, 'mIOU:{}'.format(sum(iou_list) / float(model.SEG_PART)))
config.DATA.NUM_CLASSESS = len(class_names) opt.width = 600 opt.height = 600 # FRCNN divisor = 32 val_transform = A.Compose( [ # A.SmallestMaxSize(600, p=1.0), # resize到短边600 # A.PadIfNeeded(min_height=None, min_width=None, pad_height_divisor=divisor, pad_width_divisor=divisor, p=1.0), ToTensorV2(p=1.0), ], p=1.0) Model = get_model(config.MODEL.NAME) model = Model(opt) model = model.to(device=opt.device) if opt.load: which_epoch = model.load(opt.load) else: which_epoch = 0 model.eval() # img = cv2.imread('a.jpg') # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) # img /= 255.0 # 转成0~1之间 # # img_input = val_transform(image=img)['image']
def eval_single_gpu(worker, cfg, logger, eval_dataset, results_queue): os.environ["CUDA_VISIBLE_DEVICES"] = str(worker) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] ruler = VocCityscapesMetric() data_time = AverageMeter() batch_time = AverageMeter() end = time.time() # Load dataset data_size = len(eval_dataset) eval_loader = DataLoader(eval_dataset, batch_size=2, shuffle=False, num_workers=4, pin_memory=True, drop_last=False) # Get model checkpoint. checkpoint = torch.load(cfg['model_path']) # Load model. model = network.get_model() if worker == 0: logger.info(model) model = torch.nn.DataParallel(model).cuda() model.load_state_dict( safe_loader(checkpoint['state_dict'], use_model='multi')) logger.info( f'Worker[{worker}]: Segmentation Network Total Params number: {count_model_param(model) / 1E6}M' ) model.eval() process_count = 0 for _, (image, label) in enumerate(eval_loader): assert image.shape[0] == label.shape[0] data_time.update(time.time() - end) for j in range(image.shape[0]): cur_image = image[j].numpy() cur_label = label[j].numpy() h, w, _ = cur_image.shape prediction = np.zeros((h, w, cfg['classes']), dtype=float) for scale in cfg['scales']: long_size = round(scale * cfg['base_size']) new_h = long_size new_w = long_size if h > w: new_w = round(long_size / float(h) * w) else: new_h = round(long_size / float(w) * h) image_scale = cv2.resize(cur_image, (new_w, new_h), interpolation=cv2.INTER_LINEAR) if len(image_scale.shape) == 2: # Unsqueeze for gray image. image_scale = np.expand_dims(image_scale, 2) prediction += eval_in_scale(model, image_scale, cfg['classes'], cfg['test_h'], cfg['test_w'], h, w, mean, std) prediction /= len(cfg['scales']) prediction = np.argmax(prediction, axis=2) batch_time.update(time.time() - end) end = time.time() process_count += 1 if process_count % 10 == 0 or process_count == data_size: logger.info( '[Worker{}] Test: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}).'. format(worker, process_count, data_size, data_time=data_time, batch_time=batch_time)) if cur_label is not None: hist_tmp, labeled_tmp, correct_tmp = ruler.hist_info( cfg['classes'], prediction, cur_label) results_queue.put({ 'hist': hist_tmp, 'labeled': labeled_tmp, 'correct': correct_tmp })