def main(_): # Construct Graph capsNet = CapsNet(is_training=True, routing=1, dcap_init=1) print('[+] Graph is constructed') config = tf.ConfigProto() # use GPU0 config.gpu_options.visible_device_list = '2' # allocate 50% of GPU memory config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.45 # Start session sv = tf.train.Supervisor(graph=capsNet.graph, logdir=conf.logdir, save_model_secs=0) with sv.managed_session(config=config) as sess: print('[+] Trainable variables') for tvar in capsNet.trainable_variables: print(tvar) print('[+] Training start') for epoch in range(conf.num_epochs): if sv.should_stop(): break losses = [] # from tqdm import tqdm # for step in tqdm(range(capsNet.num_batch), total=capsNet.num_batch, ncols=70, leave=False, unit='b'): for step in range(capsNet.num_batch): _, mloss = sess.run([capsNet.cnn_op, capsNet.mloss]) losses.append(mloss) if step % 100 == 0: print("%s: epoch[%g/%g], step[%g/%g], mloss[%f]" % (datetime.now(), epoch, conf.num_epochs, step, capsNet.num_batch, mloss)) print(('[+] EPOCH %d : ' % epoch) + str(np.mean(losses))) gs = sess.run(capsNet.global_step) sv.saver.save( sess, conf.logdir + '/model_epoch_%02d_gs_%d_init' % (epoch, gs)) for epoch in range(conf.num_epochs): if sv.should_stop(): break losses = [] # from tqdm import tqdm # for step in tqdm(range(capsNet.num_batch), total=capsNet.num_batch, ncols=70, leave=False, unit='b'): for step in range(capsNet.num_batch): _, dloss = sess.run([capsNet.cap_op, capsNet.dloss]) losses.append(dloss) if step % 100 == 0: print("%s: epoch[%g/%g], step[%g/%g], dloss[%f]" % (datetime.now(), epoch, conf.num_epochs, step, capsNet.num_batch, dloss)) print(('[+] EPOCH %d : ' % epoch) + str(np.mean(losses))) gs = sess.run(capsNet.global_step) sv.saver.save( sess, conf.logdir + '/model_epoch_%02d_gs_%d_cap' % (epoch, gs)) print("[+] Training is Completed") return
def main(_): # Construct Graph capsNet = CapsNet(is_training=True, routing=2) print('[+] Graph is constructed') config = tf.ConfigProto() # use GPU0 config.gpu_options.visible_device_list = '2' # allocate 50% of GPU memory config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.45 # Start session sv = tf.train.Supervisor(graph=capsNet.graph, logdir=conf.logdir, save_model_secs=0) with sv.managed_session(config=config) as sess: print('[+] Trainable variables') for tvar in capsNet.train_vars: print(tvar) print('[+] Training start') for epoch in range(conf.num_epochs): if sv.should_stop(): break losses = [] # from tqdm import tqdm # for step in tqdm(range(capsNet.num_batch), total=capsNet.num_batch, ncols=70, leave=False, unit='b'): for step in range(capsNet.num_batch): _,_, loss, rloss, mloss = sess.run([capsNet.train_op, capsNet.routing_op, capsNet.loss, capsNet.routing_loss, capsNet.mloss]) losses.append(loss) if step % 100 == 0: print("%s: epoch[%g/%g], step[%g/%g], loss[%f], rloss[%f], mloss[%f]"%(datetime.now(), epoch, conf.num_epochs, step, capsNet.num_batch, loss, rloss, mloss)) print(('[+] EPOCH %d : ' % epoch) + str(np.mean(losses))) gs = sess.run(capsNet.global_step) sv.saver.save(sess, conf.logdir + '/model_epoch_%02d_gs_%d' % (epoch, gs)) ###### teX, teY = load_mnist(conf.dataset, is_training=False) # Start session reconstruction_err = [] classification_acc = [] for i in range(10000 // conf.batch_size): start = i * conf.batch_size end = start + conf.batch_size # Reconstruction # Classification cls_result = sess.run(capsNet.preds, {capsNet.x: teX[start:end]}) cls_answer = teY[start:end] cls_acc = np.mean(np.equal(cls_result, cls_answer).astype(np.float32)) classification_acc.append(cls_acc) # Print classification accuracy & reconstruction error print('classification_acc : ' + str(np.mean(classification_acc) * 100)) ###### print("[+] Training is Completed") return
def encode(data, model_ckpt, save_name): num_test = data.shape[0] imgs = tf.placeholder( tf.float32, [args.batch_size, args.height, args.width, args.num_channel]) model = CapsNet(imgs, None, 'capsnet') model.build() saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, model_ckpt) encode = np.zeros([num_test, args.num_class, 16]) for i in range(num_test // args.batch_size): img_bch = data[i * args.batch_size:(i + 1) * args.batch_size, :, :, :] encode_bch = sess.run(model.caps2, feed_dict={imgs: img_bch}) assert encode_bch.shape == (args.batch_size, args.num_class, 16, 1) encode[i*args.batch_size:(i+1)*args.batch_size,:,:] = \ np.squeeze(encode_bch,[-1]) np.save(save_name, encode)
def main(_): # Load Graph capsNet = CapsNet(is_training=False) print('[+] Graph is constructed') # Load test data teX, teY = load_mnist(conf.dataset, is_training=False) # Start session with capsNet.graph.as_default(): sv = tf.train.Supervisor(logdir=conf.logdir) with sv.managed_session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: # Restore parameters sv.saver.restore(sess, tf.train.latest_checkpoint(conf.logdir)) print('[+] Graph is restored from ' + conf.logdir) # Make results directory if not os.path.exists('results'): os.mkdir('results') reconstruction_err = [] classification_acc = [] for i in range(10000 // conf.batch_size): start = i * conf.batch_size end = start + conf.batch_size # Reconstruction recon_imgs = sess.run(capsNet.decoded, {capsNet.x: teX[start:end]}) recon_imgs = np.reshape(recon_imgs, (conf.batch_size, -1)) orgin_imgs = np.reshape(teX[start:end], (conf.batch_size, -1)) squared = np.square(recon_imgs - orgin_imgs) reconstruction_err.append(np.mean(squared)) if i % 5 == 0: imgs = np.reshape(recon_imgs, (conf.batch_size, 28, 28, 1)) size = 6 save_images(imgs[0:size * size, :], [size, size], 'results/test_%03d.png' % i) # Classification cls_result = sess.run(capsNet.preds, {capsNet.x: teX[start:end]}) cls_answer = teY[start:end] cls_acc = np.mean( np.equal(cls_result, cls_answer).astype(np.float32)) classification_acc.append(cls_acc) # Print classification accuracy & reconstruction error print('reconstruction_err : ' + str(np.mean(reconstruction_err))) print('classification_acc : ' + str(np.mean(classification_acc) * 100))
def main(_): # Load Graph capsNet = CapsNet(is_training=False, routing=2) print('[+] Graph is constructed') # Load test data teX, teY = load_mnist(conf.dataset, is_training=False) config = tf.ConfigProto(allow_soft_placement=True) # use GPU0 config.gpu_options.visible_device_list = '3' # allocate 50% of GPU memory config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.45 # Start session with capsNet.graph.as_default(): sv = tf.train.Supervisor(logdir=conf.logdir) with sv.managed_session(config=config) as sess: # Restore parameters checkpoint_path = tf.train.latest_checkpoint(conf.logdir) sv.saver.restore(sess, checkpoint_path) print('[+] Graph is restored from ' + checkpoint_path) # Make results directory if not os.path.exists('results'): os.mkdir('results') reconstruction_err = [] classification_acc = [] for i in range(10000 // conf.batch_size): start = i * conf.batch_size end = start + conf.batch_size # Reconstruction # Classification cls_result = sess.run(capsNet.preds, {capsNet.x: teX[start:end]}) cls_answer = teY[start:end] cls_acc = np.mean( np.equal(cls_result, cls_answer).astype(np.float32)) classification_acc.append(cls_acc) # Print classification accuracy & reconstruction error print('classification_acc : ' + str(np.mean(classification_acc) * 100))
def get_network(opts): if opts.dataset == "mnist": capsnet = CapsNet(reconstruction_type=opts.decoder, routing_iterations = opts.routing_iterations, batchnorm=opts.batch_norm, loss=opts.loss_type, leaky_routing=opts.leaky_routing) if opts.dataset == "small_norb": if opts.decoder == "Conv": opts.decoder = "Conv32" capsnet = CapsNet(reconstruction_type=opts.decoder, imsize=32, num_classes=5, routing_iterations = opts.routing_iterations, primary_caps_gridsize=8, num_primary_capsules=32, batchnorm=opts.batch_norm, loss = opts.loss_type, leaky_routing=opts.leaky_routing) if opts.dataset == "cifar10": if opts.decoder == "Conv": opts.decoder = "Conv32" capsnet = CapsNet(reconstruction_type=opts.decoder, imsize=32, routing_iterations = opts.routing_iterations, primary_caps_gridsize=8, img_channels=3, batchnorm=opts.batch_norm, num_primary_capsules=32, loss=opts.loss_type, leaky_routing=opts.leaky_routing) if opts.use_gpu: capsnet.cuda() if opts.gpu_ids: capsnet = GPUParallell(capsnet, opts.gpu_ids) print("Training on GPU IDS:", opts.gpu_ids) return capsnet
def main(_): tf.logging.info('Loading Graph...') model = CapsNet() tf.logging.info('Graph loaded') sv = tf.train.Supervisor(graph=model.graph, logdir=cfg.logdir, save_model_secs=0) if not cfg.is_training: #_ = evaluation(model, sv) _ = test(model, sv) else: tf.logging.info('Start is_training...') loss, acc = train(model, sv) tf.logging.info('Training done') if cfg.save: test_acc = evaluation(model, sv)
def train(args, from_epoch=0): if not os.path.exists(os.path.dirname(args.summary_path)): os.mkdir(os.path.dirname(args.summary_path)) acc_file = open(args.summary_path, 'a') tf.set_random_seed(100) imgs = tf.placeholder(tf.float32, [args.batch_size, args.height, args.width, args.num_channel]) labels_raw = tf.placeholder(tf.float32, [args.batch_size]) labels = tf.one_hot(tf.cast(labels_raw, tf.int32), args.num_class) model = CapsNet(imgs, labels, 'capsnet') model.build() model.calc_loss() pred = tf.argmax(tf.reduce_sum(tf.square(model.caps2),axis=[2,3]),axis=1) pred = tf.cast(pred, tf.float32) correct_pred = tf.cast(tf.equal(pred, labels_raw), tf.float32) accuracy = tf.reduce_mean(correct_pred) loss = model.total_loss optimizer = tf.train.AdamOptimizer(0.0001) train_op = optimizer.minimize(loss) ######### will run out of memory ## val_imgs = tf.placeholder(tf.float32, [10000, args.height, args.width, args.num_channel]) ## val_labels_raw = tf.placeholder(tf.float32, [10000]) ## val_labels = tf.one_hot(tf.cast(val_labels_raw, tf.int32), 10) ## val_model = CapsNet(val_imgs, val_labels, name="capsnet") ## val_model.build() ## val_model.calc_loss() ## val_pred = tf.argmax(tf.reduce_sum(tf.square(val_model.caps2),axis=[2,3]),axis=1) ## val_pred = tf.cast(val_pred, tf.float32) ## correct_val_pred = tf.cast(tf.equal(val_pred, val_labels_raw), tf.float32) ## val_accuracy = tf.reduce_mean(correct_val_pred) ## val_loss = val_model.total_loss #################### for mnist data ## train_img, train_label = prepare_data(args.data_path, 'mnist_train.csv', 60000) ## train_img = np.reshape(train_img, [-1, 28, 28, 1]) ## num_train = train_img.shape[0] ## test_img, test_label = prepare_data(args.data_path, 'mnist_test.csv', 10000) ## test_img = np.reshape(test_img, [-1, 28, 28, 1]) ############ for face96 data ## data = np.load("face_96.npy") ## total_label = data[:,0] ## total_img = data[:,1:]/255 ## #total_img, total_label = prepare_data('face_96.csv', length) ## test_img = total_img[[i for i in range(0, length, 5)],:] ## test_label = total_label[[i for i in range(0, length, 5)]] ## test_img = np.reshape(test_img, [-1, 196, 196, 3]) ## num_test = test_img.shape[0] ## train_idx = list(set(range(length)) - set(range(0, length, 5))) ## train_img = total_img[train_idx, :] ## train_label = total_label[train_idx] ## train_img = np.reshape(train_img, [-1, 196, 196, 3]) ## num_train = train_img.shape[0] ## ## train_img_resize = np.zeros([length, 28, 28, 3]) ## for i in range(num_train): ## train_img_resize[i,:,:,0] = resize(train_img[i,:,:,0],[28,28]) ## train_img_resize[i,:,:,1] = resize(train_img[i,:,:,1],[28,28]) ## train_img_resize[i,:,:,2] = resize(train_img[i,:,:,2],[28,28]) ## test_img_resize = np.zeros([length, 28, 28, 3]) ## for i in range(num_test): ## test_img_resize[i,:,:,0] = resize(test_img[i,:,:,0],[28,28]) ## test_img_resize[i,:,:,1] = resize(test_img[i,:,:,1],[28,28]) ## test_img_resize[i,:,:,2] = resize(test_img[i,:,:,2],[28,28]) ## train_img = train_img_resize ## test_img = test_img_resize ################ for landmark data, don't need this one ####### idea is using getimagebatch and keep track of ########## label using index of the labels.npy ## #train_labels = np.load("labels_train_8249.npy") ## num_labels= 50 ## train_size = 0.9 ## test_size = 0.1 ## data_meta = np.load(args.metadata_path) ## # print(data_meta.shape) (1225029,3) ## (data_urls_train, labels_train, imgid_train, data_urls_test, ## labels_test, imgid_test) = Utils_Data.FormatDataset(data_meta, ## num_labels=num_labels, train_size=train_size, test_size=test_size) ## num_train = data_urls_train.size ## num_test = data_urls_test.size ## #print(set(labels_train),set(labels_test)) ## 50 classes ## labels_train_set = list(set(labels_train)) ## labels_train_dict = {labels_train_set[i]:i for i in range(len(labels_train_set))} ## labels_train = [labels_train_dict[labels_train[i]] for i in range(len(labels_train))] ## labels_train = np.array(labels_train) ## #print(labels_train[:100]) ## np.random.seed(100) ## np.random.shuffle(data_urls_train) ## np.random.seed(100) ## np.random.shuffle(labels_train) ## np.random.seed(100) ## np.random.shuffle(imgid_train) ## in_memory_url = data_urls_train[:30000] ## in_memory_labels = labels_train[:30000] ## in_memory_imgid = imgid_train[:30000] ## curr_index = 0 ## img_bch, curr_index, label_index = Utils_Data.GetImageBatch(urls=in_memory_url, ## start_index=curr_index,imgids=in_memory_imgid, batch_size=30000, \ ## path='../images', n_rows=28, n_cols=28) ## print(img_bch.shape, label_index[:100]) ## raise ## ###### load train and test data, trainsize ~30000, testsize ~34000 test_img = np.load(os.path.join(args.data_path, "train_img_9.npy")) test_label = np.load(os.path.join(args.data_path, "train_label_9.npy")) num_test = test_img.shape[0] saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session() as sess: if from_epoch == 0: sess.run(tf.global_variables_initializer()) else: saver.restore(sess, args.CheckPointPath+"-{}".format(from_epoch)) for epoch in range(from_epoch, from_epoch+args.epochs): if epoch % 10 == 0: print("-----------------loading data--------------------") train_img = np.load(os.path.join(args.data_path, "train_img_{}.npy".format(epoch//10%9))) print("check: ", train_img.max(), train_img.shape) train_label = np.load(os.path.join(args.data_path, "train_label_{}.npy".format(epoch//10%9))) print("check: ", train_label.max(), train_label.shape) num_train = train_img.shape[0] loops = num_train//args.batch_size curr_index = 0 for i in range(loops): tic = time.time() #### get input mini-batch img_bch = train_img[(i*args.batch_size):((i+1)*args.batch_size),:,:,:] label_bch = train_label[(i*args.batch_size):((i+1)*args.batch_size)] ####### this method need IO from disk, slow #img_bch, curr_index, label_index = Utils_Data.GetImageBatch(urls=data_urls_train, # start_index=curr_index,imgids=imgid_train, batch_size=args.batch_size, \ # path='../images', n_rows=28, n_cols=28) #print(curr_index, label_index) #img_bch = np.array(img_bch / 255 * 2 - 2, np.float32) #label_bch = np.array(labels_train[label_index], np.float32) #### train the model, take down the current training accuracy _, cur_accuracy, cur_loss = sess.run([train_op, accuracy, loss], feed_dict={imgs: img_bch, labels_raw: label_bch}) print("Epoch: {} BATCH: {} Time: {:.4f} Loss:{:.4f} Accu: {:.4f}".format( epoch, i, time.time()-tic, cur_loss, cur_accuracy)) if epoch % 30 == 0: saver.save(sess, args.CheckPointPath, global_step=epoch) #### get the validate accuracy acc = [] for j in range(num_test//args.batch_size//10): val_img_bch = test_img[(j*args.batch_size):((j+1)*args.batch_size),:,:,:] val_label_bch = test_label[(j*args.batch_size):((j+1)*args.batch_size)] val_acc = sess.run(accuracy, feed_dict={imgs: val_img_bch, labels_raw: val_label_bch}) acc.append(val_acc) print("Epoch: {} TestAccu: {:.4f}".format( epoch, np.mean(acc))) try: acc_file.write("Epoch: {} TestAccu: {:.4f}\n".format( epoch, np.mean(acc))) acc_file.flush() except: continue acc_file.close()
data.normalize(stats) data.c_in, data.c_out = config.MODEL.inputchannels, 2 ### seg_model Caps print('data done') run_dir = config.LEARNER.runsave_dir + '/' + name os.makedirs(run_dir, exist_ok=True) exp_name = 'seg_model_caps' mlflow_CB = partial( MLFlowTracker, exp_name=exp_name, uri='file:/workspace/oct_ca_seg/runsaves/fastai_experiments/mlruns/', params=config.config_dict, log_model=True, nb_path="/workspace/oct_ca_seg/oct/02_caps.ipynb") deepCap = CapsNet(config.MODEL).cuda() #print(config_dict) #print('debugging', data.one_batch()[0].shape, deepCap(data.one_batch()[0].cuda()), config.MODEL.input_images) learner = Learner(data=data, model=deepCap, metrics=metrics, callback_fns=mlflow_CB) print('starting run') with mlflow.start_run(): learner.fit_one_cycle(config.LEARNER.epochs, slice(config.LEARNER.lr), pct_start=config.LEARNER.pct_start) MLPY.save_model(learner.model, run_dir + '/model') learner.save(Path(run_dir) / 'learner')
dataloader = DataLoader(pathName, matName, PATCH_SIZE, RATIO, AUGMENT_RATIO) trainPatch, trainSpectrum, trainLabel = dataloader.loadTrainData() testPatch, testSpectrum, testLabel = dataloader.loadTestData() allLabeledPatch, allLabeledSpectrum, allLabeledLabel, allLabeledIndex = dataloader.loadAllLabeledData() w = tf.placeholder(shape=[None, dataloader.bands, 1], dtype=tf.float32) x = tf.placeholder(shape=[None, dataloader.patchSize, dataloader.patchSize, dataloader.bands], dtype=tf.float32) y = tf.placeholder(shape=[None, dataloader.numClasses], dtype=tf.float32) k = tf.placeholder(dtype=tf.float32) if args.model == 1: pred = DCCapsNet(x, w, k, dataloader.numClasses, FIRST_LAYER, SECOND_LAYER) print("USING DCCAPS***************************************") elif args.model==2: pred = CapsNet(x, dataloader.numClasses) print("USING CAPS*****************************************") else: pred=conv_net(x,dataloader.numClasses) print("USING CONV*****************************************") pred = tf.divide(pred, tf.reduce_sum(pred, 1, keep_dims=True)) loss = tf.reduce_mean(cl.losses.margin_loss(y, pred)) optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(loss) correctPredictions = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correctPredictions, "float")) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: leastLoss = 100.0
type=int, default=16, metavar='N', help='num of workers to fetch data') parser.add_argument('--multi-gpu', default=False, help='if use multiple gpu(default: False)') args = parser.parse_args() use_cuda = not args.disable_cuda and torch.cuda.is_available() A, B, C, D, E, r = 32, 32, 32, 32, args.num_classes, args.r # a classic CapsNet if args.multi_gpu: print("Enable multi gpus") model = nn.parallel.DataParallel( CapsNet(args.batch_size, A, B, C, D, E, r)) else: model = CapsNet(args.batch_size, A, B, C, D, E, r) meter_loss = tnt.meter.AverageValueMeter() meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True) confusion_meter = tnt.meter.ConfusionMeter(args.num_classes, normalized=True) setting_logger = VisdomLogger('text', opts={'title': 'Settings'}, env=args.env_name) train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}, env=args.env_name) train_error_logger = VisdomPlotLogger('line',