def main(): logging.basicConfig(filename='result/log2', level=logging.DEBUG) #_/_/_/ load dataset _/_/_/ train_dl = np.array([l.strip() for l in open(train_fn).readlines()]) test_dl = np.array([l.strip() for l in open(test_fn).readlines()]) mini_batch_loader = MiniBatchLoader(IMAGE_DIR_PATH, TRAIN_BATCH_SIZE, MyFcn.IN_SIZE) #_/_/_/ load model _/_/_/ cuda.get_device(GPU).use() myfcn = pickle.load(open('result/myfcn_epoch_60.model', 'rb')) myfcn = myfcn.to_gpu() #_/_/_/ setup _/_/_/ #optimizer = chainer.optimizers.SGD(LEARNING_RATE) optimizer = optimizers.MomentumSGD(lr=LEARNING_RATE) optimizer.setup(myfcn) #_/_/_/ training _/_/_/ for epoch in range(61, EPOCHS+1): st = time.time() sys.stdout.flush() indices = np.random.permutation(train_data_size) sum_accuracy = 0 sum_loss = 0 for i in range(0, train_data_size, TRAIN_BATCH_SIZE): r = indices[i:i+TRAIN_BATCH_SIZE] raw_x, raw_y = mini_batch_loader.load_data(train_dl[r]) x = Variable(cuda.to_gpu(raw_x)) y = Variable(cuda.to_gpu(raw_y)) myfcn.zerograds() myfcn.train = True loss = myfcn(x, y) loss.backward() optimizer.update() if math.isnan(loss.data): raise RuntimeError("ERROR in main: loss.data is nan!") sum_loss += loss.data * TRAIN_BATCH_SIZE sum_accuracy += myfcn.accuracy * TRAIN_BATCH_SIZE end = time.time() msg = "epoch:{x} training loss:{a}, accuracy {b}, time {c}".format(x=epoch,a=sum_loss/train_data_size, b=sum_accuracy/train_data_size, c=end-st) print(msg) logging.info(msg) draw_loss_curve('result/log2', 'result/shape/loss2.png') sys.stdout.flush() optimizer.lr *= DECAY_FACTOR # if EPOCH_BORDER > epoch else DECAY_FACTOR_2 if epoch == 1 or epoch % SNAPSHOT_EPOCHS == 0: pickle.dump(myfcn, open(PICKLE_DUMP_PATH.format(i=epoch), "wb")) #_/_/_/ testing _/_/_/ if epoch == 1 or epoch % TEST_EPOCHS == 0: test(mini_batch_loader, myfcn,test_dl)
data_loader = Process(target=load_data, args=(trans, args, input_q, data_q)) data_loader.start() # eval st = time.time() sum_loss = eval(test_dl, N_test, model, trans, args, input_q, data_q) msg = get_log_msg('test', epoch, sum_loss, N_test, args, st) logging.info(msg) print('\n%s' % msg) if epoch == 1 or epoch % args.snapshot == 0: model_fn = '%s/%s_epoch_%d.chainermodel' % ( result_dir, args.prefix, epoch + args.epoch_offset) pickle.dump(model, open(model_fn, 'wb'), -1) draw_loss_curve(log_fn, '%s/log.jpg' % result_dir) # quit data loading thread input_q.put(None) data_loader.join() model_fn = '%s/%s_epoch_%d.chainermodel' % ( result_dir, args.prefix, epoch + args.epoch_offset) pickle.dump(model, open(model_fn, 'wb'), -1) input_q.put(None) data_loader.join() logging.info(time.strftime('%Y-%m-%d_%H-%M-%S'))
create_result_dir(args) # create model and optimizer model, optimizer = get_model_optimizer(args) train_dl, test_dl = load_dataset(args) N, N_test = len(train_dl), len(test_dl) logging.info('# of training data:{}'.format(N)) logging.info('# of test data:{}'.format(N_test)) logging.info(time.strftime('%Y-%m-%d_%H-%M-%S')) logging.info('start training...') # learning loop for epoch in range(args.epoch_offset + 1, args.epoch + 1): # train sum_loss = one_epoch(args, model, optimizer, epoch, train_dl, True) logging.info('epoch:{}\ttraining loss:{}'.format(epoch, sum_loss / N)) if epoch == 1 or epoch % args.test_freq == 0: sum_loss = one_epoch(args, model, optimizer, epoch, test_dl, False) logging.info('epoch:{}\ttest loss:{}'.format( epoch, sum_loss / N_test)) if epoch == 1 or epoch % args.snapshot == 0: model_fn = '{}/epoch-{}.model'.format(args.result_dir, epoch) opt_fn = '{}/epoch-{}.state'.format(args.result_dir, epoch) serializers.save_hdf5(model_fn, model) serializers.save_hdf5(opt_fn, optimizer) draw_loss_curve(args.log_fn, '{}/log.png'.format(args.result_dir))
create_result_dir(args) # create model and optimizer model, optimizer = get_model_optimizer(args) train_dl, test_dl = load_dataset(args) N, N_test = len(train_dl), len(test_dl) logging.info('# of training data:{}'.format(N)) logging.info('# of test data:{}'.format(N_test)) # learning loop for epoch in range(args.epoch_offset + 1, args.epoch + 1): # train sum_loss = one_epoch(args, model, optimizer, epoch, train_dl, True) logging.info('epoch: {}\ttraining loss: {}'.format( epoch, sum_loss / N)) if epoch == 1 or epoch % args.snapshot == 0: model_fn = '{}/epoch-{}.model'.format(args.result_dir, epoch) opt_fn = '{}/epoch-{}.state'.format(args.result_dir, epoch) serializers.save_npz(model_fn, model) serializers.save_npz(opt_fn, optimizer) if epoch == 1 or epoch % args.test_freq == 0: logging.info('-' * 20) sum_loss = one_epoch(args, model, optimizer, epoch, test_dl, False) logging.info('epoch: {}\ttest loss: {}'.format( epoch, sum_loss / N_test)) draw_loss_curve(args.log_fn, '{}/log.png'.format(args.result_dir)) logging.info('=' * 20)
def one_epoch(args, model, optimizer, data, label, epoch, train): model.train = train xp = cuda.cupy if args.gpu >= 0 else np # for parallel augmentation aug_queue = Queue() aug_worker = Process(target=augmentation, args=(args, aug_queue, data, label, train)) aug_worker.start() logging.info('data loading started') sum_accuracy = 0 sum_loss = 0 num = 0 while True: datum = aug_queue.get() if datum is None: break x, t = datum volatile = 'off' if train else 'on' x = Variable(xp.asarray(x), volatile=volatile) t = Variable(xp.asarray(t), volatile=volatile) if train: optimizer.update(model, x, t) if epoch == 1 and num == 0: with open('{}/graph.dot'.format(args.result_dir), 'w') as o: g = computational_graph.build_computational_graph( (model.loss, ), remove_split=True) o.write(g.dump()) sum_loss += float(model.loss.data) * t.data.shape[0] sum_accuracy += float(model.accuracy.data) * t.data.shape[0] num += t.data.shape[0] logging.info('{:05d}/{:05d}\tloss:{:.3f}\tacc:{:.3f}'.format( num, data.shape[0], sum_loss / num, sum_accuracy / num)) else: pred = model(x, t).data pred = pred.mean(axis=0) acc = int(pred.argmax() == t.data[0]) sum_accuracy += acc num += 1 logging.info('{:05d}/{:05d}\tacc:{:.3f}'.format( num, data.shape[0], sum_accuracy / num)) del x, t if train and (epoch == 1 or epoch % args.snapshot == 0): model_fn = '{}/epoch-{}.model'.format(args.result_dir, epoch) opt_fn = '{}/epoch-{}.state'.format(args.result_dir, epoch) serializers.save_hdf5(model_fn, model) serializers.save_hdf5(opt_fn, optimizer) if train: logging.info('epoch:{}\ttrain loss:{}\ttrain accuracy:{}'.format( epoch, sum_loss / data.shape[0], sum_accuracy / data.shape[0])) else: logging.info('epoch:{}\ttest loss:{}\ttest accuracy:{}'.format( epoch, sum_loss / data.shape[0], sum_accuracy / data.shape[0])) draw_loss.draw_loss_curve('{}/log.txt'.format(args.result_dir), '{}/log.png'.format(args.result_dir), epoch) aug_worker.join() logging.info('data loading finished')
batchsize = args.batchsize for epoch in range(1, n_epoch + 1): # train if args.opt == 'MomentumSGD': print('learning rate:', optimizer.lr) if epoch % args.lr_decay_freq == 0: optimizer.lr *= args.lr_decay_ratio sum_loss, sum_accuracy = train(train_data, train_labels, N, model, optimizer, trans, args) msg = 'epoch:{:02d}\ttrain mean loss={}, accuracy={}'.format( epoch + args.epoch_offset, sum_loss / N, sum_accuracy / N) logging.info(msg) print('\n%s' % msg) # validate sum_loss, sum_accuracy = validate(test_data, test_labels, N_test, model, args) msg = 'epoch:{:02d}\ttest mean loss={}, accuracy={}'.format( epoch + args.epoch_offset, sum_loss / N_test, sum_accuracy / N_test) logging.info(msg) print('\n%s' % msg) if epoch == 1 or epoch % args.snapshot == 0: model_fn = '%s/%s_epoch_%d.chainermodel' % ( result_dir, args.prefix, epoch + args.epoch_offset) pickle.dump(model, open(model_fn, 'wb'), -1) draw_loss_curve(log_fn, '%s/log.jpg' % result_dir)
def one_epoch(args, model, optimizer, data, label, epoch, train): model.train = train xp = cuda.cupy if args.gpu >= 0 else np # # for parallel augmentation # aug_queue = Queue() # aug_worker = Process(target=augmentation, # args=(args, aug_queue, data, label, train)) # aug_worker.start() logging.info('data loading started') sum_accuracy = 0 sum_loss = 0 num = 0 while True: datum = aug_queue.get() if datum is None: break x, t = datum volatile = 'off' if train else 'on' x = Variable(xp.asarray(x), volatile=volatile) t = Variable(xp.asarray(t), volatile=volatile) if train: optimizer.update(model, x, t) if epoch == 1 and num == 0: with open('{}/graph.dot'.format(args.result_dir), 'w') as o: g = computational_graph.build_computational_graph( (model.loss, ), remove_split=True) o.write(g.dump()) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) num += t.data.shape[0] logging.info('{:05d}/{:05d}\tloss:{:.3f}\tacc:{:.3f}'.format( num, data.shape[0], sum_loss / num, sum_accuracy / num)) else: pred = model(x, t).data pred = pred.mean(axis=0) acc = int(pred.argmax() == t.data[0]) sum_accuracy += acc num += 1 logging.info('{:05d}/{:05d}\tacc:{:.3f}'.format( num, data.shape[0], sum_accuracy / num)) del x, t if train and (epoch == 1 or epoch % args.snapshot == 0): model_fn = '{}/epoch-{}.model'.format(args.result_dir, epoch) opt_fn = '{}/epoch-{}.state'.format(args.result_dir, epoch) serializers.save_hdf5(model_fn, model) serializers.save_hdf5(opt_fn, optimizer) if train: logging.info('epoch:{}\ttrain loss:{}\ttrain accuracy:{}'.format( epoch, sum_loss / data.shape[0], sum_accuracy / data.shape[0])) else: logging.info('epoch:{}\ttest loss:{}\ttest accuracy:{}'.format( epoch, sum_loss / data.shape[0], sum_accuracy / data.shape[0])) draw_loss.draw_loss_curve('{}/log.txt'.format(args.result_dir), '{}/log.png'.format(args.result_dir), epoch) aug_worker.join() logging.info('data loading finished')
msg = get_log_msg('training', epoch, sum_loss, N, args, st) logging.info(msg) print('\n%s' % msg) # eval st = time.time() sum_loss = eval(test_dl, N_test, model, trans, args, vinput_q, vdata_q) msg = get_log_msg('test', epoch, sum_loss, N_test, args, st) logging.info(msg) print('\n%s' % msg) if epoch == 1 or epoch % args.snapshot == 0: model_fn = '{}/{}_epoch_{}.chainermodel'.format( result_dir, args.prefix, epoch + args.epoch_offset) pickle.dump(model, open(model_fn, 'wb'), -1) draw_loss_curve(log_fn, '{}/log.jpg'.format(result_dir)) # quit training data loading thread tinput_q.put(None) tdata_loader.join() # quit data loading thread vinput_q.put(None) vdata_loader.join() model_fn = '%s/%s_epoch_%d.chainermodel' % (result_dir, args.prefix, epoch + args.epoch_offset) pickle.dump(model, open(model_fn, 'wb'), -1) input_q.put(None) data_loader.join()
msg = get_log_msg('training', epoch, sum_loss, N, args, st) logging.info(msg) print('\n%s' % msg) # eval st = time.time() sum_loss = eval(test_dl, N_test, model, trans, args, vinput_q, vdata_q) msg = get_log_msg('test', epoch, sum_loss, N_test, args, st) logging.info(msg) print('\n%s' % msg) if epoch == 1 or epoch % args.snapshot == 0: model_fn = '{}/{}_epoch_{}.chainermodel'.format( result_dir, args.prefix, epoch + args.epoch_offset) pickle.dump(model, open(model_fn, 'wb'), -1) draw_loss_curve(log_fn, '{}/log.jpg'.format(result_dir)) # quit training data loading thread tinput_q.put(None) tdata_loader.join() # quit data loading thread vinput_q.put(None) vdata_loader.join() model_fn = '%s/%s_epoch_%d.chainermodel' % ( result_dir, args.prefix, epoch + args.epoch_offset) pickle.dump(model, open(model_fn, 'wb'), -1) input_q.put(None) data_loader.join()