def main(): df_train = pd.read_csv(DATA_ROOT / 'train_masks.csv') ids_train = df_train['img'].map(lambda s: s.split('.')[0]) ids_train_split, ids_valid_split = train_test_split(ids_train, test_size=0.2, random_state=42) print('Training on {} samples'.format(len(ids_train_split))) print('Validating on {} samples'.format(len(ids_valid_split))) train_dataset = CarvanaTrainDataset(ids_train_split.values) valid_dataset = CarvanaTrainDataset(ids_valid_split.values) train_loader = DataLoader(train_dataset, shuffle=True, batch_size=TRAIN_BATCH_SIZE) valid_loader = DataLoader(valid_dataset, batch_size=TRAIN_BATCH_SIZE) model = UNet() model.cuda() if LOAD_MODEL: load_best_model(model) model.cuda() criterion = Loss() optimizer = optim.RMSprop(model.parameters(), lr=0.0001) train_util.train(model, criterion, optimizer, 100, train_loader, valid_loader)
def train(args): ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id) package.install_packages(ctx.python_packages, ctx.bucket) model = ctx.models_id_map[args.model] logger.info("Training") with util.Tempdir(ctx.cache_dir) as temp_dir: model_dir = os.path.join(temp_dir, "model_dir") ctx.upload_resource_status_start(model) try: model_impl = ctx.get_model_impl(model["name"]) train_util.train(model["name"], model_impl, ctx, model_dir) ctx.upload_resource_status_success(model) logger.info("Caching") logger.info("Caching model " + model["name"]) model_export_dir = os.path.join(model_dir, "export", "estimator") model_zip_path = os.path.join(temp_dir, "model.zip") util.zip_dir(model_export_dir, model_zip_path) aws.upload_file_to_s3(local_path=model_zip_path, key=model["key"], bucket=ctx.bucket) util.log_job_finished(ctx.workload_id) except CortexException as e: ctx.upload_resource_status_failed(model) e.wrap("error") logger.error(str(e)) logger.exception( "An error occurred, see `cx logs model {}` for more details.". format(model["name"])) sys.exit(1) except Exception as e: ctx.upload_resource_status_failed(model) logger.exception( "An error occurred, see `cx logs model {}` for more details.". format(model["name"])) sys.exit(1)
def run(config): """Entry point to run training.""" init_data_normalizer(config) stage_ids = train_util.get_stage_ids(**config) if not config['train_progressive']: stage_ids = list(stage_ids)[-1:] # Train one stage at a time for stage_id in stage_ids: batch_size = train_util.get_batch_size(stage_id, **config) tf.reset_default_graph() with tf.device(tf.train.replica_device_setter(config['ps_tasks'])): model = lib_model.Model(stage_id, batch_size, config) model.add_summaries() print('Variables:') for v in tf.global_variables(): print('\t', v.name, v.get_shape().as_list()) logging.info('Calling train.train') train_util.train(model, **config)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--token_data", default=None, type=str, required=True, help="包含 train,test,evl 和 vocab.json的文件夹") parser.add_argument("--feature_dir_prefix", default="features", help="train,test,evl从样本转化成特征所存储的文件夹前缀置") parser.add_argument("--do_train", action='store_true', help="是否进行训练") parser.add_argument("--do_decode", action='store_true', help="是否对测试集进行测试") parser.add_argument("--example_num", default=1024 * 8, type=int, help="每一个特征文件所包含的样本数量") parser.add_argument("--article_max_len", default=400, type=int, help="文章的所允许的最大长度") parser.add_argument("--abstract_max_len", default=100, type=int, help="摘要所允许的最大长度") parser.add_argument("--vocab_num", default=50000, type=int, help="词表所允许的最大长度") parser.add_argument("--pointer_gen", action='store_true', help="是否使用指针机制") parser.add_argument("--use_coverage", action="store_true", help="是否使用汇聚机制") parser.add_argument("--no_cuda", action='store_true', help="当GPU可用时,选择不用GPU") parser.add_argument("--epoch_num", default=10, type=int, help="epoch") parser.add_argument("--train_batch_size", default=16, type=int, help="train batch size") parser.add_argument("--eval_batch_size", default=64, type=int, help="evaluate batch size") parser.add_argument("--hidden_dim", default=256, type=int, help="hidden dimension") parser.add_argument("--embedding_dim", default=128, type=int, help="embedding dimension") parser.add_argument("--coverage_loss_weight", default=1.0, type=float, help="coverage loss weight ") parser.add_argument("--eps", default=1e-12, type=float, help="log(v + eps) Avoid v == 0,") parser.add_argument("--dropout", default=0.5, type=float, help="dropout") parser.add_argument("--lr", default=1e-3, type=float, help="learning rate") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--adagrad_init_acc", default=0.1, type=float, help="learning rate") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument( "--gradient_accumulation_steps", default=1, type=int, help= "Number of updates steps to accumulate before performing a backward/update pass." ) parser.add_argument("--output_dir", default="output", type=str, help="Folder to store models and results") parser.add_argument("--evaluation_steps", default=500, type=int, help="Evaluation every N steps of training") parser.add_argument("--seed", default=4321, type=int, help="Random seed") args = parser.parse_args() args.device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") set_seed(args.seed) vocab_file = os.path.join(args.token_data, 'vocab.json') assert os.path.exists(vocab_file) vocab = Vocab(vocab_file=vocab_file, vob_num=args.vocab_num) check(args, vocab=vocab) model = PointerGeneratorNetworks(vob_size=args.vocab_num, embed_dim=args.embedding_dim, hidden_dim=args.hidden_dim, pad_idx=vocab.pad_idx, dropout=args.dropout, pointer_gen=args.pointer_gen, use_coverage=args.use_coverage) model = model.to(args.device) model = model.to(args.device) if args.do_train: optimizer = Adam(model.parameters(), lr=args.lr) train(args=args, model=model, optimizer=optimizer, with_eval=True) if args.do_decode: decoder(args, model, vocab=vocab)
idx: class_ for class_, idx in model.class_to_idx.items() } for p in optimizer.param_groups[0]['params']: if p.requires_grad: print(p.shape) # 최적화해야할 파라미터 그룹 출력 cuda.empty_cache() # GPU 캐시 초기화 model, history = train_util.train( model, # 사용할 모델 criterion, # 사용할 Loss 함수 optimizer, # 사용할 Optimizer함수 dataloaders['train'], # train 데이터셋 dataloaders['val'], # validation 데이터셋 save_file_name=save_file_name, # 저장할 이름 max_epochs_stop=10, # 몇 epoch 동안 vaild loss의 감소가 없으면 학습을 중단할 것인지 n_epochs=training_epoch, # 최대 몇 epochs 학습할것인지 print_every=1, # 몇 epoch마다 출력할 것인지 early_stop=train_util.Early_stop) # Early_stop을 할것인지 # Loss, Acc 그래프 저장 함수 train_util.save_train_valid_loss(history, model_choice) # 모델 저장 함수 train_util.save_checkpoint(model, path=checkpoint_path, model_name=model_choice) # 랜덤하게 이미지를 한장 뽑아내는 함수
def main(): parser = argparse.ArgumentParser() parser.add_argument("--token_data", default=None, required=True, type=str, help="包含 train,test,dev 和 vocab.json的文件夹") parser.add_argument("--feature_dir_prefix", default="features", help="train,test,evl从样本转化成特征所存储的文件夹前缀置") parser.add_argument("--do_train", action='store_true', help="是否进行训练") parser.add_argument("--do_decode", action='store_true', help="是否对测试集进行测试") parser.add_argument("--example_num", default=1024 * 8, type=int, help="每一个特征文件所包含的样本数量") parser.add_argument("--no_cuda", action='store_true', help="当GPU可用时,选择不用GPU") parser.add_argument("--epoch_num", default=15, type=int, help="epoch") parser.add_argument("--train_batch_size", default=16, type=int, help="train batch size") parser.add_argument( "--gradient_accumulation_steps", default=4, type=int, help= "Number of updates steps to accumulate before performing a backward/update pass." ) parser.add_argument("--eval_batch_size", default=128, type=int, help="evaluate batch size") parser.add_argument("--lr", default=1e-3, type=float, help="learning rate") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--adagrad_init_acc", default=0.1, type=float, help="adagrad init acc") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--output_dir", default="output", type=str, help="Folder to store models and results") parser.add_argument("--evaluation_steps", default=500, type=int, help="Evaluation every N steps of training") parser.add_argument("--seed", default=4321, type=int, help="Random seed") args = parser.parse_args() args.device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") set_seed(args.seed) vocab_file = os.path.join(args.token_data, 'vocab.json') assert os.path.exists(vocab_file) model_config_file = os.path.join(".", "model", "model_config.json") assert os.path.exists(model_config_file) with open(model_config_file, "r", encoding="utf-8") as f: model_config_dict = json.load(f) f.close() model_config = ModelConfig(**model_config_dict) vocab = Vocab(vocab_file=vocab_file, vob_num=model_config.vocab_size) model_config.pad_idx = vocab.pad_idx model_config.unk_idx = vocab.unk_idx model_config.start_idx = vocab.start_idx model_config.stop_idx = vocab.stop_idx check(args, model_config, vocab) model = PointerGeneratorNetworks(config=model_config) model = model.to(args.device) model = model.to(args.device) if args.do_train: optimizer = Adam(model.parameters(), lr=args.lr) train(args=args, model_config=model_config, model=model, optimizer=optimizer, with_eval=True) if args.do_decode: decoder(args, model_config=model_config, model=model, vocab=vocab)
def kqn_main(): # argument parser # default hyperparams not set to optimal # dropout not used in the implementation parser = ArgumentParser() parser.add_argument( '--dataset', type=str, default='assist0910', help='choose from assist0910, assist15, statics11, and synthetic-5') parser.add_argument('--version', type=int, default=None, help='if dataset==synthetic-5, choose from 0 to 19') parser.add_argument( '--min_seq_len', type=int, default=2, help= 'minimum threshold of number of time steps to discard student problem-solving records.' ) parser.add_argument('--rnn', type=str, default='lstm', help='rnn type. one of lstm and gru.') parser.add_argument( '--hidden', type=int, default=128, help='dimensionality of skill and knowledge state vectors') parser.add_argument( '--rnn_hidden', type=int, default=128, help='number of hidden units for knowledge state encoder rnn') parser.add_argument('--mlp_hidden', type=int, default=128, help='number of hidden units for skill encoder mlp') parser.add_argument('--layer', type=int, default=1, help='number of rnn layers') parser.add_argument('--gpu', type=int, default=-1, help='which gpu to use. default to -1: not using any') parser.add_argument('--lr', type=float, default=0.001, help='learning rate for adam') parser.add_argument('--batch', type=int, default=100, help='batch size') parser.add_argument('--ckpt', type=str, default='./ckpt', help='default checkpoint path') parser.add_argument('--epoch', type=int, default=100, help='number of epochs') parser.add_argument( '--optim', type=str, default='adam', help='optimizer to use. currently only adam is implemented.') args = parser.parse_args() dataset = args.dataset version = args.version min_seq_len = args.min_seq_len rnn_type = args.rnn n_hidden = args.hidden n_rnn_hidden = args.rnn_hidden n_mlp_hidden = args.mlp_hidden n_rnn_layers = args.layer gpu = args.gpu lr = args.lr batch_size = args.batch ckpt_path = args.ckpt n_epochs = args.epoch opt_str = args.optim if ckpt_path is not None: if not (os.path.exists(ckpt_path)): os.makedirs(ckpt_path) if gpu == -1: DEVICE = 'cpu' elif torch.cuda.is_available(): DEVICE = gpu # load data n_skills = get_num_skills(dataset) fnames = { 'train': get_csv_fname(True, dataset, version), 'eval': get_csv_fname(False, dataset, version) } datasets = { 'train': read_csv(fnames['train'], min_seq_len), 'eval': read_csv(fnames['eval']) } datasets = { 'train': KQNDataset(datasets['train'][0], datasets['train'][1], datasets['train'][2], n_skills), 'eval': KQNDataset(datasets['eval'][0], datasets['eval'][1], datasets['eval'][2], n_skills) } dataloaders = { 'train': DataLoader(datasets['train'], batch_size=batch_size, drop_last=False, collate_fn=PadSequence(), shuffle=True), 'eval': DataLoader(datasets['eval'], batch_size=batch_size, drop_last=False, collate_fn=PadSequence()) } model = KQN(n_skills, n_hidden, n_rnn_hidden, n_mlp_hidden, n_rnn_layers, rnn_type, DEVICE).to(DEVICE) if opt_str == 'adam': opt_class = Adam optimizer = opt_class(model.parameters(), lr=lr) writer = SummaryWriter('./logs') train(model, dataloaders, optimizer, writer, n_epochs, ckpt_path, DEVICE)
with tf.Session(config=config, graph=net.graph) as sess: sess.run(tf.global_variables_initializer(), {net.is_training: True}) if FLAGS.in_model_dirs: exclude = '' if 'embedding' in FLAGS.loss_func: exclude = 'Yp' elif 'position' in FLAGS.loss_func: exclude = 'Yc' for in_model_dir in FLAGS.in_model_dirs.split(','): assert(load_model(sess, in_model_dir, exclude)) if FLAGS.train: train(sess, net, train_data, test_data, n_epochs=FLAGS.n_epochs, snapshot_epoch=FLAGS.snapshot_epoch, model_dir=FLAGS.out_model_dir, log_dir=FLAGS.log_dir, data_name=g_shape_synset, output_generator=None) else: ''' train_loss, train_accuracy, _ = evaluate(sess, net, train_data) test_loss, test_accuracy, _ = evaluate(sess, net, test_data) msg = "|| Train Loss: {:6f}".format(train_loss) msg += " | Train Accu: {:5f}".format(train_accuracy) msg += " | Test Loss: {:6f}".format(test_loss) msg += " | Test Accu: {:5f}".format(test_accuracy) msg += " ||" print(msg) ''' if 'joint_embedding' in FLAGS.loss_func or\
def run(args, train_data, val_data, test_data): tf.set_random_seed(1234) np.random.seed(1234) random.seed(1234) print('\n==== PARAMS ====') for arg in vars(args): print('{}={}'.format(arg, getattr(args, arg))) print('========\n') if args.exp_type == 'ours': net = Network(train_data.n_points, train_data.n_dim, test_data.n_seg_ids, args.K, args.batch_size, args.init_learning_rate, args.decay_step, args.decay_rate, args.bn_decay_step, args.l21_norm_weight, args.net_options) elif args.exp_type == 'sem_seg': print("## Sementic Segmentation ##") net = NetworkSemSeg(train_data.n_points, train_data.n_dim, train_data.n_labels, args.batch_size, args.init_learning_rate, args.decay_step, args.decay_rate, args.bn_decay_step, args.net_options) else: assert (False) config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.allow_growth = True with tf.Session(config=config, graph=net.graph) as sess: sess.run(tf.global_variables_initializer(), {net.is_training: True}) if args.in_model_dirs: include = '' for in_model_dir in args.in_model_dirs.split(','): assert (load_model(sess, in_model_dir, include)) if args.train: train(sess, net, args.exp_type, train_data, val_data, n_epochs=args.n_epochs, snapshot_epoch=args.snapshot_epoch, validation_epoch=args.validation_epoch, model_dir=args.out_model_dir, log_dir=args.log_dir, data_name=train_data.name, output_generator=None) train_loss, _ = validate(sess, net, args.exp_type, train_data) test_loss, _ = validate(sess, net, args.exp_type, test_data) msg = "|| Train Loss: {:6f}".format(train_loss) msg += " | Test Loss: {:6f}".format(test_loss) msg += " ||" print(msg) if args.train: # Save training result. if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) out_file = os.path.join( args.out_dir, '{}.txt'.format(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))) with open(out_file, 'w') as f: f.write(msg + '\n') print("Saved '{}'.".format(out_file)) if args.exp_type == 'ours': if 'eval' in args.eval_type: evaluate.evaluate(sess, net, test_data, args.out_dir) if 'eval_keypoints' in args.eval_type: evaluate_keypoints.evaluate(sess, net, test_data, args.out_dir) if 'eval_obj_det' in args.eval_type: evaluate_obj_det.evaluate(sess, net, test_data, args.out_dir) if 'save_dict' in args.eval_type: P = test_data.point_clouds A = predict_A(P, sess, net) out_file = os.path.join(args.out_dir, 'dictionary.npy') np.save(out_file, A) print("Saved '{}'".format(out_file)) elif args.exp_type == 'sem_seg': evaluate_sem_seg.evaluate(sess, net, test_data, args.out_dir)
device = 'cuda' if torch.cuda.is_available() else 'cpu' # --------------------------------------- # # --- Full precision model load/train --- # # --------------------------------------- # if args.model == "vgg16": net = VGG16() elif args.model == "resnet50": net = ResNet50() else: print("Model {} not supported!".format(args.model)) sys.exit(0) net = net.to(device) # Uncomment to load pretrained weights #net.load_state_dict(torch.load("net_before_pruning.pt")) # Comment if you have loaded pretrained weights # Tune the hyperparameters here. if not args.skip_pt: train(net, epochs=args.epochs, batch_size=args.batch, lr=args.lr, reg=args.reg, checkpoint_path=args.ckpt_dir) else: net.load_state_dict(torch.load(args.path)) print("Net loaded from {}".format(args.path)) test(net) summary(net)
import model import train_util import torch import torch.nn as nn import torch.optim as optim train_path = '' val_path = '' dset_loaders, dset_sizes, dset_classes = data_util.load_data( train_path=train_path, val_path=val_path) print(dset_sizes) print(dset_classes) net = model.AlexNet().cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), weight_decay=0.0005) lr_scheduler = train_util.exp_lr_scheduler lr = 0.001 best_model, best_acc = train_util.train(net, criterion, optimizer, lr_scheduler, dset_loaders, dset_sizes, lr, 40) print('Saving the best model') filename = 'trained_model_val_{:.2f}.pt'.format(best_acc) torch.save(best_model.state_dict(), filename)