def main(): random.seed(SEED) np.random.seed(SEED) token_stream = get_data() assert START_TOKEN == 0 words = ['_START'] + list(set(token_stream)) word2idx = dict((word, i) for i, word in enumerate(words)) num_words = len(words) three_grams = dict((tuple(word2idx[w] for w in token_stream[i:i + 3]), True) for i in range(len(token_stream) - 3)) print('num words', num_words) print('stream length', len(token_stream)) print('distinct 3-grams', len(three_grams)) trainable_model = get_trainable_model(num_words) sess = tf.Session() sess.run(tf.global_variables_initializer()) print('training') for epoch in range(TRAIN_ITER // EPOCH_ITER): print('epoch', epoch) proportion_supervised = max(0.0, 1.0 - CURRICULUM_RATE * epoch) train.train_epoch( sess, trainable_model, EPOCH_ITER, proportion_supervised=proportion_supervised, g_steps=1, d_steps=D_STEPS, next_sequence=lambda: get_random_sequence(token_stream, word2idx), verify_sequence=lambda seq: verify_sequence(three_grams, seq), words=words)
def trainer(model, train_data, test_data, epochs, learning_rate): # set loss function and optimizer loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) # loop over the dataset multiple times for epoch in range(epochs): # train one epoch train_epoch(train_data, model, loss_function, optimizer) # validate epoch on validation set loss_train, accuracy_train, loss_test, accuracy_test = validate_epoch( train_data, test_data, model, loss_function) # print the metrics template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' print( template.format( epoch, np.array2string(loss_train, precision=2, floatmode='fixed'), np.array2string(accuracy_train * 100, precision=2, floatmode='fixed'), np.array2string(loss_test, precision=2, floatmode='fixed'), np.array2string(accuracy_test * 100, precision=2, floatmode='fixed'))) print('Finished Training')
def main(): random.seed(SEED) np.random.seed(SEED) token_stream = get_data() assert START_TOKEN == 0 words = ['_START'] + list(set(token_stream)) word2idx = dict((word, i) for i, word in enumerate(words)) num_words = len(words) three_grams = dict((tuple(word2idx[w] for w in token_stream[i:i + 3]), True) for i in range(len(token_stream) - 3)) print('num words', num_words) print('stream length', len(token_stream)) print('distinct 3-grams', len(three_grams)) trainable_model = get_trainable_model(num_words) sess = tf.Session() sess.run(tf.global_variables_initializer) print('training') for epoch in range(TRAIN_ITER // EPOCH_ITER): print('epoch', epoch) proportion_supervised = max(0.0, 1.0 - CURRICULUM_RATE * epoch) train.train_epoch( sess, trainable_model, EPOCH_ITER, proportion_supervised=proportion_supervised, g_steps=1, d_steps=D_STEPS, next_sequence=lambda: get_random_sequence(token_stream, word2idx), verify_sequence=lambda seq: verify_sequence(three_grams, seq), words=words)
def retrain_with_pseudo_label(loaded_models, train_ids, valid_ids, TRAIN_IMAGE_DIR, DATAFRAME, config): if 'pseudo_dataframe' not in loaded_models[list(loaded_models.keys())[0]]: return def worker_init_fn(worker_id): random.seed(worker_id+random_seed) np.random.seed(worker_id+random_seed) for key in loaded_models.keys(): # make dataloader with pseudo label model_config = loaded_models[key]['config'] dataframe_with_pseudo = pd.concat([DATAFRAME.loc[DATAFRAME['image_id'].isin(train_ids), :], loaded_models[key]['pseudo_dataframe']], axis=0) retrain_dataset = GWDDataset(dataframe_with_pseudo, TRAIN_IMAGE_DIR, model_config, is_train=True, do_transform=False) # dataset for retrain retrain_data_loader = DataLoader(retrain_dataset, batch_size=1, shuffle=True, num_workers=0, worker_init_fn=worker_init_fn, collate_fn=collate_fn) model = copy.deepcopy(loaded_models[key]['model']) model.train() trainable_params = [p for p in model.parameters() if p.requires_grad] optimizer = get_optimizer(model_config['train']['optimizer'], trainable_params) # retraining print("Retraining %s" % key) for epoch in range(0, config['epochs']): if model_config['general']['kfold'] < 0: print("\r[Epoch %d]" % epoch) train_epoch(model, retrain_data_loader, None, optimizer) model.eval() loaded_models[key]['pseudo_model'] = model return
def main(): train_dataset = datasets.MNIST( root='C:/Users/user/Documents/InterestingAttempt/VAE/mnist_data/', train=True, transform=transforms.Compose([transforms.ToTensor()])) test_dataset = datasets.MNIST( root='C:/Users/user/Documents/InterestingAttempt/VAE/mnist_data/', train=False, transform=transforms.Compose([transforms.ToTensor()])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=100, shuffle=True) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=1000, shuffle=False) criterion = nn.MSELoss() # criterion = nn.BCELoss() criterion2 = KLLoss() epoch_num = 30 lr = 1e-3 weight_decay = 1e-5 lamda = 0.01 latent_num = 2 mid_features = 256 outf = r'C:\Users\user\Documents\InterestingAttempt\VAE\logs\linear_{}_{}_{}_{}_{}'.format( latent_num, lr, lamda, weight_decay, epoch_num) if not os.path.exists(outf): os.makedirs(outf) model = VAE(28 * 28, mid_features, latent_num).cuda() optimizer = optim.Adam( model.parameters(), weight_decay=weight_decay, betas=(0.9, 0.999)) writer = SummaryWriter(outf) for epoch in range(epoch_num): current_lr = lr / 2**int(epoch / 40) for param_group in optimizer.param_groups: param_group['lr'] = current_lr train_epoch( model, optimizer, train_loader, criterion, epoch, writer=writer, criterion2=criterion2, lamda=lamda) test( model, test_loader, criterion, epoch, writer=writer, criterion2=criterion2) if (epoch + 1) % 10 == 0: torch.save(model.state_dict(), os.path.join(outf, 'model_{}.pth'.format(epoch))) writer.close() torch.save(model.state_dict(), os.path.join(outf, 'model.pth'))
def run(): args = parse_opts() os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = "9" # GLOBAL VARS # MODE = args.mode CLASS_WEIGHT = False N_EP = 20 FLATTEN = args.flatten RNN = args.rnn BATCH_SIZE = args.batch_size #### datasets, dataloaders = init_dataset( BATCH_SIZE, single_channel=args.single_channel) print('[Train] class counts', np.unique( datasets['train'].target_vals, return_counts=True)) print('[Test] class counts', np.unique( datasets['test'].target_vals, return_counts=True)) n_ch = 1 if args.single_channel else 3 if MODE == 'min': in_channels = datasets['train'].min_depth*n_ch elif MODE == 'max': in_channels = datasets['train'].max_depth*n_ch torch.manual_seed(0) # init net net = init_net(opt=args.model_idx, in_channels=in_channels) class_weight = None if CLASS_WEIGHT: cnts = Counter(datasets['train'].target_vals) n = len(datasets['train']) class_weight = [max(cnts.values())/cnts['0'], max(cnts.values())/cnts['1']] class_weight = torch.FloatTensor(class_weight) cross_entrp_loss = nn.CrossEntropyLoss(weight=class_weight).cuda() focal_loss = FocalLoss().cuda() optimizer = optim.Adam(net.parameters(), lr=0.000027) criterion = cross_entrp_loss # scheduler = optim.lr_scheduler.ReduceLROnPlateau( # optimizer, 'min', verbose=True, patience=7) for ep in range(N_EP): train_epoch(net, dataloaders['train'], optimizer, criterion, ep, scheduler=None, flatten=FLATTEN, MODE=MODE, rnn=RNN) valid_loss = evaluate(net, dataloaders['test'], criterion, ep, flatten=FLATTEN, MODE=MODE, rnn=RNN)
def main(): # init or load model print("init model with input shape",config["input_shape"]) model = NvNet(config=config,input_shape=config["input_shape"], seg_outChans=config["n_labels"]) parameters = model.parameters() optimizer = optim.Adam(parameters, lr=config["initial_learning_rate"], weight_decay = config["L2_norm"]) start_epoch = 1 if config["VAE_enable"]: loss_function = CombinedLoss(k1=config["loss_k1_weight"], k2=config["loss_k2_weight"]) else: loss_function = SoftDiceLoss() # data_generator print("data generating") training_data = BratsDataset(phase="train", config=config) train_loader = torch.utils.data.DataLoader(dataset=training_data, batch_size=config["batch_size"], shuffle=True, pin_memory=True) valildation_data = BratsDataset(phase="validate", config=config) valildation_loader = torch.utils.data.DataLoader(dataset=valildation_data, batch_size=config["batch_size"], shuffle=True, pin_memory=True) train_logger = Logger(model_name=config["model_file"],header=['epoch', 'loss', 'acc', 'lr']) if config["cuda_devices"] is not None: model = model.cuda() loss_function = loss_function.cuda() # if not config["overwrite"] and os.path.exists(config["model_file"]) or os.path.exists(config["saved_model_file"]): # model, start_epoch, optimizer = load_old_model(model, optimizer, saved_model_path=config["saved_model_file"]) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=config["lr_decay"],patience=config["patience"]) print("training on label:{}".format(config["labels"])) for i in range(start_epoch,config["epochs"]): train_epoch(epoch=i, data_loader=train_loader, model=model, model_name=config["model_file"], criterion=loss_function, optimizer=optimizer, opt=config, epoch_logger=train_logger) val_loss = val_epoch(epoch=i, data_loader=valildation_loader, model=model, criterion=loss_function, opt=config, optimizer=optimizer, logger=train_logger) scheduler.step(val_loss)
def test_bert(self): utt_encoder = model.BertUttEncoder(utt_dims) dar_model = model.DARRNN(utt_dims, n_labels, n_hidden, 1, dropout=0) train_params = itertools.chain(dar_model.parameters(), utt_encoder.parameters()) optimizer = optim.Adam(train_params) criterion = nn.CrossEntropyLoss(ignore_index=0) print("Testing BERT on random inputs.") for epoch in range(epochs): train.train_epoch(utt_encoder, dar_model, train_data, n_labels, batch_size, bptt, None, criterion, optimizer, 'cpu')
def main(): print(outf) print("loading dataset ...") trainDataset = TrainDataset(name='/data0/niejiangtao/ICIP2019Deraining/train/train.h5') batchSize = opt.batchSize_per_gpu * len(device_ids) trainLoader = udata.DataLoader(trainDataset, batch_size=batchSize, shuffle=True, num_workers=0) testDataset = TestDataset2(name='/data0/niejiangtao/ICIP2019Deraining/test_a/test.h5') print('testDataset len : {}'.format(len(testDataset))) l1_criterion = nn.L1Loss().cuda() # mask_criterion = nn.MSELoss().cuda() ssim_criterion = SSIM().cuda() model = UNet_v2(n_channels=3, n_classes=3) # model = RESCAN() if len(device_ids) > 1: model = nn.DataParallel(model, device_ids=device_ids) model.cuda() beta1 = 0.9 beta2 = 0.999 optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, weight_decay=weight_decay, betas=(beta1, beta2)) writer = SummaryWriter(outf) for epoch in range(opt.epochs): start = time.time() current_lr = opt.lr / 2**int(epoch / interval) for param_group in optimizer.param_groups: param_group["lr"] = current_lr print("epoch {} learning rate {}".format(epoch, current_lr)) # test(model, testDataset, None, epoch, writer=writer) train_epoch(model, optimizer, trainLoader, l1_criterion, None, ssim_criterion, epoch, writer=writer, radio=radio) if (epoch+1) % 5 == 0: test(model, testDataset, None, epoch, writer=writer) if (epoch+1) % 20 == 0: """ torch.save({ 'epoch': epoch, 'model_state_dict': model.module.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, os.path.join(outf, 'checkpoint_{}.pth'.format(epoch))) """ torch.save(model.state_dict(), os.path.join(outf, 'model_{}.pth'.format(epoch))) end = time.time() print('epoch {} cost {} hour '.format( epoch, str((end - start) / (60 * 60)))) torch.save(model.state_dict(), os.path.join(outf, 'model.pth')) generate_result(model, outf, testDataset, mat=False, ouput_img=True)
def train(rnn_trainer, rnn_predictor, train_data, valid_target_data, valid_source_data, dictionary, epoch_size, model_directory, beam_size, viterbi_size): start_time = time.time() log_path = os.path.join(model_directory, 'log.txt') log_file = open(log_path, 'w') best_epoch = None best_metrics = None for epoch in range(epoch_size): # Train one epoch and save the model train_epoch(rnn_trainer, train_data, model_directory, epoch) # Decode all sentences rnn_predictor.restore_from_directory(model_directory) system, decode_time = decode_all(rnn_predictor, valid_source_data, dictionary, beam_size, viterbi_size) # Evaluate results metrics = evaluate(system, valid_target_data) # Print metrics log_text = 'decoding precision: {:.2f} recall: {:.2f} f-score: {:.2f} accuracy: {:.2f}\n'.format( *metrics) log_text += 'decoding total time: {:.2f} average time: {:.2f}'.format( decode_time, decode_time / len(system)) print(log_text) print(log_text, file=log_file) # Write decoded results to file decode_path = os.path.join(model_directory, 'decode-{}.txt'.format(epoch)) with open(decode_path, 'w') as file: file.write('\n'.join(system)) # Update best epoch if not best_epoch or best_metrics[2] < metrics[2]: best_epoch = epoch best_metrics = metrics total_time = time.time() - start_time print('best epoch:', best_epoch) print( 'best epoch metrics: precision: {:.2f} recall: {:.2f} f-score: {:.2f} accuracy: {:.2f}' .format(*best_metrics)) print('total experiment time:', total_time) print() return best_metrics, best_epoch
def main_worker(): opt = parse_opts() print(opt) seed = 1 random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) # CUDA for PyTorch device = torch.device(f"cuda:{opt.gpu}" if opt.use_cuda else "cpu") # tensorboard summary_writer = tensorboardX.SummaryWriter(log_dir='tf_logs') # defining model encoder_cnn, decoder_rnn = generate_model(opt, device) # get data loaders train_loader, val_loader = get_loaders(opt) # optimizer crnn_params = list(encoder_cnn.parameters()) + \ list(decoder_rnn.parameters()) optimizer = torch.optim.Adam(crnn_params, lr=opt.lr_rate, weight_decay=opt.weight_decay) # scheduler = lr_scheduler.ReduceLROnPlateau( # optimizer, 'min', patience=opt.lr_patience) criterion = nn.CrossEntropyLoss() # resume model if opt.resume_path: start_epoch = resume_model(opt, encoder_cnn, decoder_rnn, optimizer) else: start_epoch = 1 # start training for epoch in range(start_epoch, opt.n_epochs + 1): train_loss, train_acc = train_epoch( encoder_cnn, decoder_rnn, train_loader, criterion, optimizer, epoch, opt.log_interval, device) val_loss, val_acc = val_epoch( encoder_cnn, decoder_rnn, val_loader, criterion, device) # saving weights to checkpoint if (epoch) % opt.save_interval == 0: # scheduler.step(val_loss) # write summary summary_writer.add_scalar( 'losses/train_loss', train_loss, global_step=epoch) summary_writer.add_scalar( 'losses/val_loss', val_loss, global_step=epoch) summary_writer.add_scalar( 'acc/train_acc', train_acc * 100, global_step=epoch) summary_writer.add_scalar( 'acc/val_acc', val_acc * 100, global_step=epoch) state = {'epoch': epoch, 'encoder_state_dict': encoder_cnn.state_dict(), 'decoder_state_dict': decoder_rnn.state_dict(), 'optimizer_state_dict': optimizer.state_dict()} torch.save(state, os.path.join('snapshots', f'{opt.model}-Epoch-{epoch}-Loss-{val_loss}.pth')) print("Epoch {} model saved!\n".format(epoch))
def main(): opt = set_opts() model = load_pretrained_resnet101(opt) train_loader, val_loader, test_loader, test_data = get_ucf_data(opt) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() # get fine-tune parameters (we fine-tune all of them) parameters = get_fine_tuning_parameters(model, opt.ft_begin_index) optimizer = optim.SGD(parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=opt.lr_patience) train_logger = Logger(os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( os.path.join(opt.result_path, 'train_batch.log'), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) val_logger = Logger(os.path.join(opt.result_path, 'val.log'), ['epoch', 'loss', 'acc']) # training for i in range(opt.begin_epoch, opt.n_epochs + 1): train_epoch(i, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) validation_loss = val_epoch(i, val_loader, model, criterion, opt, val_logger) scheduler.step(validation_loss) # testing test_results, all_output_buffer = final_test(test_loader, model, opt, test_data.class_names)
def main(args): mode = "evaluation" + str(args.fold) traindataloader, testdataloader, meta = get_dataloader( args.datapath, mode, args.batchsize, args.workers, level=args.level, preload_ram=args.preload_ram) num_classes = meta["num_classes"] ndims = meta["ndims"] sequencelength = meta["sequencelength"] print(f"Logging results to {args.logdir}") logdir = os.path.join(args.logdir, str(args.fold)) os.makedirs(logdir, exist_ok=True) epochs, learning_rate, weight_decay = select_hyperparameter(args.model) device = torch.device(args.device) model = get_model(args.model, ndims, num_classes, sequencelength, device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) model.modelname += f"_learning-rate={learning_rate}_weight-decay={weight_decay}" print(f"Initialized {model.modelname}") criterion = torch.nn.CrossEntropyLoss(reduction="mean") for epoch in range(epochs): print(f"train epoch {epoch}") train_epoch(model, optimizer, criterion, traindataloader, device) losses, y_true, y_pred, y_score, field_ids = test_epoch( model, criterion, dataloader=testdataloader, device=device) logdir = os.path.join(logdir, args.model) os.makedirs(logdir, exist_ok=True) print(f"saving results to {logdir}") print(sklearn.metrics.classification_report(y_true.cpu(), y_pred.cpu()), file=open(os.path.join(logdir, "classification_report.txt"), "w")) np.save(os.path.join(logdir, "y_pred.npy"), y_pred.cpu().numpy()) np.save(os.path.join(logdir, "y_true.npy"), y_true.cpu().numpy()) np.save(os.path.join(logdir, "y_score.npy"), y_score.cpu().numpy()) np.save(os.path.join(logdir, "field_ids.npy"), field_ids.numpy()) save(model, os.path.join(logdir, model.modelname + ".pth"))
def train(hidden_size, num_layers, lr, weight_decay): region = "germany" log_name = log_pattern.format(region=region, num_layers=num_layers, hidden_size=hidden_size, lr=lr, weight_decay=weight_decay) log_path = os.path.join(log_dir, log_name) if os.path.exists(log_path): print(f"{log_path} exists. skipping...") return try: model, dataset, validdataset, dataloader, validdataloader, optimizer = setup( hidden_size, num_layers, lr, weight_decay) stats = list() for epoch in range(epochs): trainloss = train_epoch(model, dataloader, optimizer, criterion, device) testmetrics, testloss = test_epoch(model, validdataloader, device, criterion, n_predictions=1) metric_msg = ", ".join([ f"{name}={metric.compute():.2f}" for name, metric in testmetrics.items() ]) msg = f"epoch {epoch}: train loss {trainloss:.2f}, test loss {testloss:.2f}, {metric_msg}" print(msg) #test_model(model, validdataset, device) model_name = name_pattern.format(region=region, num_layers=num_layers, hidden_size=hidden_size, lr=lr, weight_decay=weight_decay, epoch=epoch) pth = os.path.join(model_dir, model_name + ".pth") print(f"saving model snapshot to {pth}") snapshot(model, optimizer, pth) stat = dict() stat["epoch"] = epoch for name, metric in testmetrics.items(): stat[name] = metric.compute() stat["trainloss"] = trainloss.cpu().detach().numpy() stat["testloss"] = testloss.cpu().detach().numpy() stats.append(stat) finally: df = pd.DataFrame(stats) df.to_csv(log_path) print(f"saving log to {log_path}")
def main(): random.seed(SEED) np.random.seed(SEED) trainable_model = get_trainable_model() sess = tf.Session() sess.run(tf.global_variables_initializer()) print('training') for epoch in range(TRAIN_ITER // EPOCH_ITER): print('epoch', epoch) proportion_supervised = max(0.0, 1.0 - CURRICULUM_RATE * epoch) train.train_epoch( sess, trainable_model, EPOCH_ITER, proportion_supervised=proportion_supervised, g_steps=4, d_steps=D_STEPS, next_sequence=get_random_sequence)
def main(): random.seed(SEED) np.random.seed(SEED) trainable_model = get_trainable_model() sess = tf.Session() sess.run(tf.global_variables_initializer()) print('training') for epoch in range(TRAIN_ITER // EPOCH_ITER): print('epoch', epoch) proportion_supervised = max(0.0, 1.0 - CURRICULUM_RATE * epoch) train.train_epoch( sess, trainable_model, EPOCH_ITER, proportion_supervised=proportion_supervised, g_steps=1, d_steps=D_STEPS, next_sequence=get_random_sequence, verify_sequence=verify_sequence)
def main(): print('') print("training EM model") os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1" opt = parse_opts() torch.manual_seed(opt.manual_seed) model, parameters = generate_model(opt) optimizer = torch.optim.Adam(parameters, lr=opt.learning_rate) if not os.path.exists(opt.model_weight): os.mkdir(opt.model_weight) trainSet = weaklyDataset(opt.train_path) train_loader = DataLoader(trainSet, batch_size=1, shuffle=True, num_workers=0) E_step = False def adjust_learning_rate(optimizer): for param_group in optimizer.param_groups: param_group['lr'] = opt.learning_rate*4 for epoch in range(1, 66): if epoch <= 10: stage = 1 E_step = False elif epoch > 10 and epoch <= 20: stage = 1 E_step = True elif epoch > 20 and epoch <= 30: stage = 2 E_step = False adjust_learning_rate(optimizer) else: stage = 2 E_step = not E_step train_epoch(epoch, train_loader, model, optimizer, opt, E_step, stage) torch.save(model.state_dict(), opt.model_weight+"/{}.pt".format(epoch)) return
def test_train_one_epoch(self): train_ds, test_ds = train.get_datasets() input_rng = onp.random.RandomState(0) model = train.create_model(random.PRNGKey(0)) optimizer = train.create_optimizer(model, 0.1, 0.9) optimizer, train_metrics = train.train_epoch(optimizer, train_ds, 128, 0, input_rng) self.assertLessEqual(train_metrics['loss'], 0.27) self.assertGreaterEqual(train_metrics['accuracy'], 0.92) loss, accuracy = train.eval_model(optimizer.target, test_ds) self.assertLessEqual(loss, 0.06) self.assertGreaterEqual(accuracy, 0.98)
def train(self, a, b, method='resume', threshold=0.7): i = 0 self.r = 1 while True: i += 1 print "Epoch ", i correct = train.train_epoch(self, a, b, method=method) p_correct = float(correct) / (b - a) print ": %", p_correct, " correct" if p_correct > threshold: break self.r = 32 / (1 + 1024*p_correct) return p_correct
def train(self, a, b, method='resume', threshold=0.7): i = 0 self.r = 1 while True: i += 1 print "Epoch ", i correct = train.train_epoch(self, a, b, method=method) p_correct = float(correct) / (b - a) print ": %", p_correct, " correct" if p_correct > threshold: break self.r = 32 / (1 + 1024 * p_correct) return p_correct
def main(): parser = argparse.ArgumentParser() parser.add_argument('-read_test_dir', required=True) parser.add_argument('-read_vocab_file', required=True) parser.add_argument('-load_model_dir', required=True) parser.add_argument('-load_model_file_list', required=True, nargs='+') parser.add_argument('-save_model_dir', required=True) parser.add_argument('-use_gpu', action='store_true') opt = parser.parse_args() print('[PROCEDURE] combining model with model averaging...') models = [] for file in opt.load_model_file_list: checkpoint = torch.load(opt.load_model_dir + '/' + file, map_location=lambda storage, loc: storage) train_options = checkpoint['train_options'] models.append(checkpoint['model']) print('[INFO] model loaded') print('[INFO] reading test data...') batch_size = 96 # dev384/test187 test_data = train.initialize_batch_loader(opt.read_test_dir + '/feats.scp', opt.read_test_dir + '/text', opt.read_vocab_file, batch_size) print('[INFO] batch loader is initialized') vocab_size = len(torch.load(opt.read_vocab_file)) crit = train.get_criterion(vocab_size) if opt.use_gpu: crit = crit.cuda() print('[INFO] using cross entropy loss.') #--------------------------------------------------------------------------------------------------------------------- ''' for model in models: if opt.use_gpu: model = model.cuda() start = time.time() test_loss, test_accu = train.train_epoch(model, test_data, crit, mode = 'eval', use_gpu = opt.use_gpu) print('[INFO]-----(evaluating test set)----- ppl: {:7.3f}, accuracy: {:3.2f} %, elapse: {:3.2f} min' .format(math.exp(min(test_loss, 100)), 100*test_accu, (time.time()-start)/60)) ''' model = sum_average_model(models) if opt.use_gpu: model = model.cuda() start = time.time() test_loss, test_accu = train.train_epoch(model, test_data, crit, mode = 'eval', use_gpu = opt.use_gpu) print('[INFO]-----(evaluating combining set)----- ppl: {:7.3f}, accuracy: {:3.2f} %, elapse: {:3.2f} min' .format(math.exp(min(test_loss, 100)), 100*test_accu, (time.time()-start)/60)) model_name = opt.save_model_dir + '/combined.accu{:3.2f}.torch'.format(100*test_accu) checkpoint['model'] = model torch.save(checkpoint, model_name)
def main(): opt = parse_opts() opt.device_ids = list(range(device_count())) local2global_path(opt) model, parameters = generate_model(opt) criterion = get_loss(opt) criterion = criterion.cuda() optimizer = get_optim(opt, parameters) writer = SummaryWriter(logdir=opt.log_path) # train spatial_transform = get_spatial_transform(opt, 'train') temporal_transform = TSN(seq_len=opt.seq_len, snippet_duration=opt.snippet_duration, center=False) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = get_data_loader(opt, training_data, shuffle=True) # validation spatial_transform = get_spatial_transform(opt, 'test') temporal_transform = TSN(seq_len=opt.seq_len, snippet_duration=opt.snippet_duration, center=False) target_transform = ClassLabel() validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = get_data_loader(opt, validation_data, shuffle=False) for i in range(1, opt.n_epochs + 1): train_epoch(i, train_loader, model, criterion, optimizer, opt, training_data.class_names, writer) val_epoch(i, val_loader, model, criterion, opt, writer, optimizer) writer.close()
def run(opts): # Set the random seed torch.manual_seed(opts.seed) random.seed(opts.seed) # Set the device opts.device = torch.device( f'cuda:{opts.gpu_id}' if opts.use_cuda else 'cpu') # Load and prepare data train_graphs = load_graphs(dirname=opts.train_dsp_dataset_dir) valid_graphs = load_graphs(dirname=opts.valid_dsp_dataset_dir) resources = load_resources(opts.communicate_costs, dirname=opts.res_dataset_dir) train_data = build_samples(train_graphs, resources, opts) valid_data = build_samples(valid_graphs, resources, opts) # train_data, valid_data = data_split(total_data, opts.train_ratio, shuffle=True) build_feature(train_data, is_train=True) build_feature(valid_data, is_train=False) train_data = data_augment(train_data, opts.train_batch_size) # Initialize model model = Model(opts.op_dim, opts.slot_dim, opts.edge_dim, opts.embed_dim, opts.dsp_conv_iter, opts.res_conv_iter, opts.dsp_gcn_aggr, opts.res_gcn_aggr, opts.gcn_act, opts.rnn_type, opts.tanh_clip).to(opts.device) optimizer = optim.Adam(model.parameters(), lr=opts.lr) lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) if opts.save_model: model_dir = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) os.mkdir(os.path.join(opts.model_dir, model_dir)) best_avg_reward = -1 for epoch in range(1, opts.epochs + 1): valid_avg_reward = train_epoch(train_data, valid_data, model, optimizer, lr_scheduler, epoch, opts) if opts.save_model and epoch > opts.save_model_epoch_threshold and valid_avg_reward > best_avg_reward: best_avg_reward = valid_avg_reward torch.save(model, f'model/{model_dir}/best_model.pt')
# Create loss criterion & optimizer # criterion = nn.CrossEntropyLoss() criterion = LabelSmoothingCrossEntropy() optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=7, threshold=0.0001) # Start training if phase == 'Train': logger.info("Training Started".center(60, '#')) for epoch in range(epochs): current_lr = get_lr(optimizer) if epoch == 15: for param_group in optimizer.param_groups: param_group['lr'] = current_lr * 0.1 print('lr: ', get_lr(optimizer)) # Train the model train_epoch(model, criterion, optimizer, train_loader, device, epoch, logger, log_interval, writer) # Validate the model val_loss = val_epoch(model, criterion, val_loader, device, epoch, logger, writer) # scheduler.step(val_loss) # Save model torch.save(model.state_dict(), os.path.join(model_path, "sign_resnet2d+1_epoch{:03d}.pth".format(epoch+1))) logger.info("Epoch {} Model Saved".format(epoch+1).center(60, '#')) elif phase == 'Test': logger.info("Testing Started".center(60, '#')) val_loss = val_epoch(model, criterion, val_loader, device, 0, logger, writer, phase=phase, exp_name=exp_name) logger.info("Finished".center(60, '#'))
def create_3d_resnet(ema=False, num_classes=101): if __name__ == '__main__': args = opts.parse_opts() if not os.path.exists(args.result_path): os.makedirs(args.result_path) # for key in cfg.keys(): # print('{}: {}'.format(key, cfg[key])) # if not os.path.exists(os.path.join(args.result_path, 'config.py')): # shutil.copyfile('./config.py', os.path.join(args.result_path, 'config.py')) args.scales = [args.initial_scale] for i in range(1, args.n_scales): args.scales.append(args.scales[-1] * args.scales_step) args.arch = 'resnet18' args.mean = get_mean(1, dataset='activitynet') args.std = get_std(args.norm_value) print(args) with open(os.path.join(args.result_path, 'args.json'), 'w') as args_file: json.dump(vars(args), args_file) torch.manual_seed(args.manual_seed) # writer = SummaryWriter(log_dir='./results') train_batch_logger = Logger(os.path.join(args.result_path, args.pth_name + '_' + 'train_batch.log'), ['epoch', 'batch', 'iter', 'class_loss', 'consistency_loss', 'prec1', 'ema_prec1', 'lr']) train_epoch_logger = Logger(os.path.join(args.result_path, args.pth_name + '_' + 'train_epoch.log'), ['epoch', 'class_loss', 'consistency_loss', 'prec1', 'ema_prec1']) val_logger = Logger(os.path.join(args.result_path, args.pth_name + '_' + 'val.log'), ['epoch', 'loss', 'prec1']) student_model = create_model().cuda() # student ema_model = create_model(ema=True).cuda() # teacher train_set, val_set, classes = prepare_cifar10(args.dataset_root) train_loader, val_loader = sample_train(train_set, val_set, len(classes), args) # classification error is ignored for unlabeled samples, but averaged by whole batch, not just labeled samples class_criterion = nn.CrossEntropyLoss(ignore_index=args.NO_LABEL, reduction='sum').cuda() if args.consistency_type == 'mse': consistency_criterion = softmax_mse_loss elif args.consistency_type == 'kl': consistency_criterion = softmax_kl_loss else: consistency_criterion = None exit('wrong consistency type! Check config file!') criterion = {'classification': class_criterion, 'consistency': consistency_criterion} optimizer = torch.optim.SGD(student_model.parameters(), args.init_lr, momentum=0.9, weight_decay=args.weight_decay, nesterov=True) best_prec1 = 0 for epoch in range(args.num_epochs): train_epoch(epoch, student_model, ema_model, train_loader, optimizer, criterion, train_batch_logger, train_epoch_logger, args) state = {'epoch': epoch, 'state_dict': student_model.state_dict(), 'ema_state_dict': ema_model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_prec1': best_prec1} save_checkpoint(state, False, args.result_path, args.pth_name) validation_loss, prec1 = validate_epoch(epoch, student_model, val_loader, criterion, val_logger, args) is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) state = {'epoch': epoch, 'state_dict': student_model.state_dict(), 'ema_state_dict': ema_model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict()} save_checkpoint(state, is_best, args.result_path, args.pth_name)
def _run_rl(opts): # Pretty print the run args pp.pprint(vars(opts)) # Set the random seed torch.manual_seed(opts.seed) # Optionally configure tensorboard tb_logger = None if not opts.no_tensorboard: tb_logger = TbLogger( os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name)) os.makedirs(opts.save_dir) # Save arguments so exact configuration can always be found with open(os.path.join(opts.save_dir, "args.json"), 'w') as f: json.dump(vars(opts), f, indent=True) # Set the device opts.device = torch.device("cuda:0" if opts.use_cuda else "cpu") # Figure out what's the problem problem = load_problem(opts.problem) # Load data from load_path load_data = {} assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given" load_path = opts.load_path if opts.load_path is not None else opts.resume if load_path is not None: print(' [*] Loading data from {}'.format(load_path)) load_data = torch_load_cpu(load_path) # Initialize model model_class = { 'attention': AttentionModel, 'pointer': PointerNetwork }.get(opts.model, None) assert model_class is not None, "Unknown model: {}".format(model_class) encoder_class = { 'gat': GraphAttentionEncoder, 'gcn': GCNEncoder, 'mlp': MLPEncoder }.get(opts.encoder, None) assert encoder_class is not None, "Unknown encoder: {}".format( encoder_class) model = model_class(opts.embedding_dim, opts.hidden_dim, problem, encoder_class, n_encode_layers=opts.n_encode_layers, mask_inner=True, mask_logits=True, normalization=opts.normalization, tanh_clipping=opts.tanh_clipping, checkpoint_encoder=opts.checkpoint_encoder, shrink_size=opts.shrink_size).to(opts.device) if opts.use_cuda and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # Compute number of network parameters print(model) nb_param = 0 for param in model.parameters(): nb_param += np.prod(list(param.data.size())) print('Number of parameters: ', nb_param) # Overwrite model parameters by parameters to load model_ = get_inner_model(model) model_.load_state_dict({ **model_.state_dict(), **load_data.get('model', {}) }) # Initialize baseline if opts.baseline == 'exponential': baseline = ExponentialBaseline(opts.exp_beta) elif opts.baseline == 'critic' or opts.baseline == 'critic_lstm': assert problem.NAME == 'tsp', "Critic only supported for TSP" baseline = CriticBaseline( (CriticNetworkLSTM(2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.tanh_clipping) if opts.baseline == 'critic_lstm' else CriticNetwork( encoder_class, 2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.normalization)).to(opts.device)) elif opts.baseline == 'rollout': baseline = RolloutBaseline(model, problem, opts) else: assert opts.baseline is None, "Unknown baseline: {}".format( opts.baseline) baseline = NoBaseline() if opts.bl_warmup_epochs > 0: baseline = WarmupBaseline(baseline, opts.bl_warmup_epochs, warmup_exp_beta=opts.exp_beta) # Load baseline from data, make sure script is called with same type of baseline if 'baseline' in load_data: baseline.load_state_dict(load_data['baseline']) # Initialize optimizer optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': opts.lr_model }] + ([{ 'params': baseline.get_learnable_parameters(), 'lr': opts.lr_critic }] if len(baseline.get_learnable_parameters()) > 0 else [])) # Load optimizer state if 'optimizer' in load_data: optimizer.load_state_dict(load_data['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): # if isinstance(v, torch.Tensor): if torch.is_tensor(v): state[k] = v.to(opts.device) # Initialize learning rate scheduler, decay by lr_decay once per epoch! lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) # Start the actual training loop val_dataset = problem.make_dataset(size=opts.graph_size, num_samples=opts.val_size, filename=opts.val_dataset) opts.val_size = val_dataset.size if opts.resume: epoch_resume = int( os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1]) torch.set_rng_state(load_data['rng_state']) if opts.use_cuda: torch.cuda.set_rng_state_all(load_data['cuda_rng_state']) # Set the random states # Dumping of state was done before epoch callback, so do that now (model is loaded) baseline.epoch_callback(model, epoch_resume) print("Resuming after {}".format(epoch_resume)) opts.epoch_start = epoch_resume + 1 if opts.eval_only: validate(model, val_dataset, opts) else: for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs): train_epoch(model, optimizer, baseline, lr_scheduler, epoch, val_dataset, problem, tb_logger, opts)
if 'baseline' in load_data: baseline.load_state_dict(load_data['baseline']) # Initialize optimizer optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': float(opts.lr_model) }] + ([{ 'params': baseline.get_learnable_parameters(), 'lr': float(opts.lr_critic) }] if len(baseline.get_learnable_parameters()) > 0 else [])) # Load optimizer state if 'optimizer' in load_data: optimizer.load_state_dict(load_data['optimizer']) # Initialize learning rate scheduler, decay by lr_decay once per epoch! lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) # Start the actual training loop val_dataset = problem.make_dataset(size=opts.graph_size, num_samples=opts.val_size) if opts.eval_only: validate(model, val_dataset, opts) else: for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs): train_epoch(model, optimizer, baseline, lr_scheduler, epoch, val_dataset, problem, opts)
os.path.join(opt.result_path, 'val.log'), ['epoch', 'loss', 'acc']) if opt.resume_path: print('loading checkpoint {}'.format(opt.resume_path)) checkpoint = torch.load(opt.resume_path) assert opt.arch == checkpoint['arch'] opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) if not opt.no_train: optimizer.load_state_dict(checkpoint['optimizer']) print('run') for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: train_epoch(i, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) if not opt.no_val: validation_loss = val_epoch(i, val_loader, model, criterion, opt, val_logger) if not opt.no_train and not opt.no_val: scheduler.step(validation_loss) if opt.test: spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID()
root_path=root_path), batch_size=100) net = ResNeXt(10) # net = SKNet(10) net.cuda() optimizer = optim.Adam(net.parameters(), weight_decay=1e-5, betas=(0.9, 0.999)) criterion = nn.CrossEntropyLoss().cuda() log_path = './logs/' writer = SummaryWriter(log_path) epoch_num = 300 lr0 = 1e-3 for epoch in range(epoch_num): current_lr = lr0 / 2**int(epoch / 50) for param_group in optimizer.param_groups: param_group['lr'] = current_lr train_epoch(net, optimizer, train_loader, criterion, epoch, writer=writer) test(net, test_loader, criterion, epoch, writer=writer) if (epoch + 1) % 5 == 0: torch.save(net.state_dict(), os.path.join('./model/model_{}.pth'.format(epoch))) torch.save(net.state_dict(), os.path.join('./model/model.pth'))
['epoch', 'loss', 'acc']) if opt.resume_path: print('loading checkpoint {}') # .format(opt.resume_path)) checkpoint = torch.load(opt.resume_path) assert opt.arch == checkpoint['arch'] opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) if not opt.no_train: optimizer.load_state_dict(checkpoint['optimizer']) print('run') for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: train_epoch(i, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) if not opt.no_val: validation_loss = val_epoch(i, val_loader, model, criterion, opt, val_logger) if not opt.no_train and not opt.no_val: scheduler.step(validation_loss) if opt.test: spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID()
if opt.resume_path: print('loading checkpoint {}'.format(opt.resume_path)) checkpoint = torch.load(opt.resume_path) assert opt.arch == checkpoint['arch'] opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) if not opt.no_train: optimizer.load_state_dict(checkpoint['optimizer']) print('run') for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: train_loss, train_acc = train_epoch( i, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger, viz, train_lot, netD, optimizerD, criterion2, netG, optimizerG, criterion3) if not opt.no_val: validation_loss, validation_acc = val_epoch( i, val_loader, model, criterion, opt, val_logger) if opt.visdom: viz.line(X=torch.ones((1, 2)).cpu() * (i - 1), Y=torch.Tensor( [[validation_loss, validation_acc * 10]]), win=val_lot, update='append') # =================================================================================== if opt.test:
def run(opts): # Pretty print the run args pp.pprint(vars(opts)) # Set the random seed torch.manual_seed(opts.seed) # Optionally configure tensorboard tb_logger = None if not opts.no_tensorboard: tb_logger = TbLogger( os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name)) os.makedirs(opts.save_dir) # Save arguments so exact configuration can always be found with open(os.path.join(opts.save_dir, "args.json"), 'w') as f: json.dump(vars(opts), f, indent=True) # Set the device opts.device = torch.device("cuda:0" if opts.use_cuda else "cpu") # Figure out what's the problem problem = load_problem(opts.problem) # Load data from load_path load_data = {} assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given" load_path = opts.load_path if opts.load_path is not None else opts.resume if load_path is not None: print(' [*] Loading data from {}'.format(load_path)) load_data = torch_load_cpu(load_path) # Initialize model model_class = { 'attention': AttentionModel, 'pointer': PointerNetwork }.get(opts.model, None) assert model_class is not None, "Unknown model: {}".format(model_class) model = model_class(opts.embedding_dim, opts.hidden_dim, problem, n_encode_layers=opts.n_encode_layers, mask_inner=True, mask_logits=True, normalization=opts.normalization, tanh_clipping=opts.tanh_clipping, checkpoint_encoder=opts.checkpoint_encoder, shrink_size=opts.shrink_size, steps=opts.awe_steps, graph_size=opts.graph_size).to(opts.device) if opts.use_cuda and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # Overwrite model parameters by parameters to load model_ = get_inner_model(model) model_.load_state_dict({ **model_.state_dict(), **load_data.get('model', {}) }) # Initialize baseline if opts.baseline == 'exponential': baseline = ExponentialBaseline(opts.exp_beta) elif opts.baseline == 'constant': baseline = ConstantBaseline() elif opts.baseline == 'critic' or opts.baseline == 'critic_lstm': assert problem.NAME == 'tsp', "Critic only supported for TSP" baseline = CriticBaseline( (CriticNetworkLSTM(2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.tanh_clipping) if opts.baseline == 'critic_lstm' else CriticNetwork( 2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.normalization)).to(opts.device)) elif opts.baseline == 'rollout': baseline = RolloutBaseline(model, problem, opts) elif opts.baseline == 'critic_lp': assert problem.NAME == 'lp' dim_vocab = {2: 2, 3: 5, 4: 15, 5: 52, 6: 203, 7: 877, 8: 4140} baseline = CriticBaseline( (CriticNetworkLP(dim_vocab[opts.awe_steps], opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.normalization)).to(opts.device)) else: assert opts.baseline is None, "Unknown baseline: {}".format( opts.baseline) baseline = NoBaseline() if opts.bl_warmup_epochs > 0: baseline = WarmupBaseline(baseline, opts.bl_warmup_epochs, warmup_exp_beta=opts.exp_beta) # Load baseline from data, make sure script is called with same type of baseline if 'baseline' in load_data: baseline.load_state_dict(load_data['baseline']) # Initialize optimizer optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': opts.lr_model }] + ([{ 'params': baseline.get_learnable_parameters(), 'lr': opts.lr_critic }] if len(baseline.get_learnable_parameters()) > 0 else [])) # Load optimizer state if 'optimizer' in load_data: optimizer.load_state_dict(load_data['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): # if isinstance(v, torch.Tensor): if torch.is_tensor(v): state[k] = v.to(opts.device) # Initialize learning rate scheduler, decay by lr_decay once per epoch! lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) # Start the actual training loop val_dataset = problem.make_dataset(num_samples=opts.val_size, filename=opts.val_dataset, distribution=opts.data_distribution, size=opts.graph_size, degree=opts.degree, steps=opts.awe_steps, awe_samples=opts.awe_samples) if opts.resume: epoch_resume = int( os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1]) torch.set_rng_state(load_data['rng_state']) if opts.use_cuda: torch.cuda.set_rng_state_all(load_data['cuda_rng_state']) # Set the random states # Dumping of state was done before epoch callback, so do that now (model is loaded) baseline.epoch_callback(model, epoch_resume) print("Resuming after {}".format(epoch_resume)) opts.epoch_start = epoch_resume + 1 if opts.eval_only: validate(model, val_dataset, opts) else: extra = {'updates': 0, 'avg_reward': 10**8, "best_epoch": -1} start = time.time() for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs): train_epoch(model, optimizer, baseline, lr_scheduler, epoch, val_dataset, problem, tb_logger, opts, extra) finish = time.time() with open("experiments.log", "a+") as f: f.write("{} {:.4f} {} {:.2f}\n".format( '-'.join(opts.train_dataset.split('/')[-2:]), extra["avg_reward"], extra["best_epoch"], finish - start)) print("Took {:.2f} sec for {} epochs".format(finish - start, opts.n_epochs))
test_loader = torch.utils.data.DataLoader(dataset('test'), batch_size=64) # net = SimpleNet_Bin(10) net = SimpleNet(10) net.cuda() optimizer = optim.Adam(net.parameters(), lr=1e-2, weight_decay=1e-6, betas=(0.9, 0.999)) criterion = nn.CrossEntropyLoss().cuda() criterion_test = nn.CrossEntropyLoss(reduction='sum').cuda() log_path = 'logs/bin' writer = SummaryWriter(log_dir=log_path) epoch_num = 20 lr0 = 1e-4 for epoch in range(epoch_num): current_lr = lr0 / 2**int(epoch / 4) for param_group in optimizer.param_groups: param_group["lr"] = current_lr # train_epoch(net, optimizer, train_loader, criterion, epoch, writer, current_lr=current_lr, mode='Bin') train_epoch(net, optimizer, train_loader, criterion, epoch, writer, current_lr=current_lr, mode='normal') test(net, test_loader, criterion_test, epoch, writer)
def objective(dropout): global count count += 1 print( '-------------------------------------------------------------------') print('%d' % count) print(dropout) os.environ["CUDA_VISIBLE_DEVICES"] = "1" torch.backends.cudnn.benchmark = True root_path = '/data/eaxfjord/deep_LFP' matrix = 'shuffled_LR.npy' batch_size = 20 training_dataset = LFP_data(root_path=root_path, data_file=matrix, split='train') training_loader = DataLoader(training_dataset, shuffle=True, batch_size=batch_size, pin_memory=True, num_workers=1) validation_set = LFP_data(root_path=root_path, data_file=matrix, split='valid') validation_loader = DataLoader(validation_set, shuffle=False, batch_size=batch_size, pin_memory=True, num_workers=1) input_shape = (2, 2110) # this is a hack to figure out shape of fc layer net = conv1d_nn.Net(input_shape=input_shape, dropout=dropout) net.cuda() criterion = nn.CrossEntropyLoss() criterion.cuda() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=100, threshold=1e-3) num_epochs = 200 for epoch in range(1, num_epochs + 1): train_loss, train_acc = train_epoch(training_loader, net, criterion, optimizer) validation_loss, validation_accuracy = val_epoch( validation_loader, net, criterion) scheduler.step(validation_loss) print( 'EPOCH:: %i, (%s), train_loss, test_loss: %.3f, train_acc: %.3f, test_acc: %.3f' % (epoch + 1, train_loss, validation_loss, train_acc, validation_accuracy)) return { 'loss': -validation_accuracy, 'status': STATUS_OK, 'val_loss': validation_loss }