def main(args): input_size = 3 output_size = 1 # Make the training and testing set to be less bias fire_dataset = FireDataset(args.csv_path[0]) fire_train, fire_test = random_split( fire_dataset, (round(0.7 * len(fire_dataset)), round(0.3 * len(fire_dataset)))) trainloader = DataLoader(fire_train, batch_size=4096, shuffle=True, num_workers=2) testloader = DataLoader(fire_test, batch_size=512, shuffle=False, num_workers=2) save_weights_pth = args.weights_path[0] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = MLP(input_size=input_size, output_size=output_size) model.to(device) criterion = nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=5e-7) epochs = 30 if args.eval_only: do_test(model, device, testloader, save_weights_pth) else: do_train(model, device, trainloader, criterion, optimizer, epochs, save_weights_pth) do_test(model, device, testloader, save_weights_pth)
def main(): # get unity environment env, brain = get_unity_envs() # get arguments args = get_arguments() print(args) # set gpu environment os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu cudnn.enabled = True cudnn.benchmark = True cuda = torch.cuda.is_available() # set random seed rn = set_seeds(args.random_seed, cuda) # make directory os.makedirs(args.snapshot_dir, exist_ok=True) # get validation dataset val_set = get_validation_dataset(args) print("len of test set: ", len(val_set)) val_loader = data.DataLoader(val_set, batch_size=args.real_batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) # generate training list with open(args.syn_list_path, "w") as fp: for i in range(args.syn_img_num): if i % 10 != 0: fp.write(str(i + 1) + '\n') # get main model main_model = MLP(args.num_inputs, args.num_outputs, args.hidden_size) if args.resume != "": main_model.load_state_dict(torch.load(args.resume)) # get task model if args.task_model_name == "FCN8s": task_model = FCN8s_sourceonly(n_class=args.num_classes) vgg16 = VGG16(pretrained=True) task_model.copy_params_from_vgg16(vgg16) else: raise ValueError("Specified model name: FCN8s") # save initial task model torch.save(task_model.state_dict(), os.path.join(args.snapshot_dir, "task_model_init.pth")) if cuda: main_model = main_model.cuda() task_model = task_model.cuda() # get optimizer main_optimizer = optim.Adam(main_model.parameters(), lr=args.main_lr) task_optimizer = optim.SGD(task_model.parameters(), lr=args.task_lr, momentum=0.9, weight_decay=1e-4) frame_idx = 0 whole_start_time = time.time() while frame_idx < args.max_frames: log_probs = [] rewards = [] start_time = time.time() for i_step in range(1, args.step_each_frame + 1): # get initial attribute list state = np.random.rand(1, args.num_inputs) state = torch.from_numpy(state).float() if cuda: state = state.cuda() # get modified attribute list dist = main_model(state) action = dist.sample() action_actual = action.float() / 10.0 # [0, 0.9] # generate images by attribute list print("action: " + str(action_actual.cpu().numpy())) get_images_by_attributes(args, i_step, env, brain, action_actual[0].cpu().numpy()) train_set = get_training_dataset(args, i_step) train_loader = data.DataLoader(train_set, batch_size=args.syn_batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) # train the task model using synthetic dataset task_model.load_state_dict( torch.load( os.path.join(args.snapshot_dir, "task_model_init.pth"))) reward = train_task_model(train_loader, val_loader, task_model, task_optimizer, args, cuda) log_prob = dist.log_prob(action)[0] log_probs.append(log_prob) rewards.append(torch.FloatTensor([reward])) frame_idx += 1 if frame_idx == 1: moving_start = torch.FloatTensor([reward]) baseline = compute_returns(rewards, moving_start) moving_start = baseline[-1] log_probs = torch.cat(log_probs) baseline = torch.cat(baseline).detach() rewards = torch.cat(rewards).detach() advantage = rewards - baseline if cuda: advantage = advantage.cuda() loss = -(log_probs * advantage.detach()).mean() with open(os.path.join(args.snapshot_dir, "logs.txt"), 'a') as fp: fp.write( "frame idx: {0:4d}, state: {1:s}, action: {2:s}, reward: {3:s}, baseline: {4:s}, loss: {5:.2f} \n" .format(frame_idx, str(state.cpu()[0].numpy()), str(action.cpu()[0].numpy()), str(rewards.numpy()), str(baseline.numpy()), loss.item())) print("optimize the main model parameters") main_optimizer.zero_grad() loss.backward() main_optimizer.step() elapsed_time = time.time() - start_time print("[frame: {0:3d}], [loss: {1:.2f}], [time: {2:.1f}]".format( frame_idx, loss.item(), elapsed_time)) torch.save( main_model.state_dict(), os.path.join(args.snapshot_dir, "main_model_%d.pth" % frame_idx)) elapsed_time = time.time() - whole_start_time print("whole time: {0:.1f}".format(elapsed_time)) env.close()
def train_model(data, model_name="mlp", log_interval=10, loss="mse", optim="adam", store=False, visual=False, verbose=False, log=False): if verbose and log: print("\nTraining model", model_name) if store: # Create directory checkpoint_path = os.path.join("checkpoints/", model_name) if not os.path.isdir(checkpoint_path): try: os.makedirs(checkpoint_path) except OSError: sys.exit("Creation of checkpoint directory failed.") # Model model = None if model_name == "mlp": model = MLP( num_features=data.X.shape[1], hidden_size=config("{}.hidden_layer".format(model_name)), ) elif model_name == "gcn": model = GCN( num_features=data.X.shape[1], hidden_size=config("{}.hidden_layer".format(model_name)), ) elif model_name == "gat": model = GAT( num_features=data.X.shape[1], hidden_size=config("{}.hidden_layer".format(model_name)), ) # Criterion and Loss Function criterion = torch.nn.MSELoss() loss_fn = None if loss == "mse": loss_fn = torch.nn.MSELoss() elif loss == "nll": loss_fn = NLLLoss() # Optimizer optimizer = None if optim == "adam": optimizer = Adam(model.parameters(), lr=config('{}.learning_rate'.format(model_name))) elif optim == "adagrad": optimizer = Adagrad(model.parameters(), lr=config('{}.learning_rate'.format(model_name))) # Setup training fig, axes = None, None if visual: fig, axes = make_training_plot(model_name) start_epoch = 0 stats = [] if store: # Attempts to restore the latest checkpoint if exists print('Loading {}...'.format(model_name)) model, start_epoch, stats = restore_checkpoint( model, config('{}.checkpoint'.format(model_name))) # Evaluate the randomly initialized model _evaluate_epoch(axes, data, model, criterion, start_epoch, stats, log_interval, log) # Loop over the entire dataset multiple times patience = config("patience") best_loss = float('inf') idx = -1 for epoch in range(start_epoch, config('{}.num_epochs'.format(model_name))): # Early stop if patience < 0: break # Train model _train_epoch(data, model, loss_fn, optimizer) # Evaluate model if (epoch + 1) % log_interval == 0: _evaluate_epoch(axes, data, model, criterion, epoch + 1, stats, log_interval, log) if store: # Save model parameters save_checkpoint(model, epoch + 1, config('{}.checkpoint'.format(model_name)), stats) valid_loss = stats[-1][0] if valid_loss < best_loss: patience = config("patience") best_loss = valid_loss idx = epoch patience -= 1 epoch = idx idx = min(int((idx + 1) / log_interval), len(stats) - 1) if verbose: print("The loss on test dataset is:", stats[idx][2], "obtained in epoch", epoch) # Save figure and keep plot open if visual: save_training_plot(fig, model_name) hold_training_plot() return stats[idx][2]
# build network mnist_mlp = MLP(io_bits=io_bits) mnist_mlp.cuda() try: data = torch.load('mlp_mnist_nobias.t7') mnist_mlp.load_state_dict(data) print('Loading weight') except: print('Initializing model') # ## criterian = nn.CrossEntropyLoss(size_average=False) optimizer = optim.SGD(mnist_mlp.parameters(), lr=learning_rate) best_acc = 0 for i in range(epoches): #training running_acc = 0. for (img, label) in trainloader: img = torch.autograd.Variable(img).cuda() label = torch.autograd.Variable(label).cuda() save_param, st = quantize_weight(mnist_mlp, w_bits, cells) noise(mnist_mlp, st) optimizer.zero_grad() output = mnist_mlp(img) loss = criterian(output, label) loss.backward() optimizer.step()
def main(args): input_size = 5 output_size = 3 epochs = 100 # Make the training and testing set to be less bias hlb_dataset = HLBDataset(args.dataset_path[0]) hlb_train, hlb_test = random_split( hlb_dataset, (round(0.8 * len(hlb_dataset)), round(0.2 * len(hlb_dataset)))) print(args.weights_path[0]) print(f'Number of training examples: {len(hlb_train)}') print(f'Number of testing examples: {len(hlb_test)}') trainloader = DataLoader(hlb_train, batch_size=2048, shuffle=True, num_workers=2) testloader = DataLoader(hlb_test, batch_size=1024, shuffle=False, num_workers=2) save_weights_pth = args.weights_path[0] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = MLP(input_size=input_size, output_size=output_size) print(model) model.to(device) weights = torch.tensor([0.85, 0.25, 1.0]) criterion = nn.CrossEntropyLoss(weight=weights).cuda() learning_rate = 1e-6 optimizer = optim.Adam(model.parameters(), lr=learning_rate) train_acc_array = [] train_loss_array = [] test_acc_array = [] test_loss_array = [] for i in range(epochs): # loop over the dataset multiple times train_loss, train_acc = do_train(model, device, trainloader, criterion, optimizer) print('Epoch {} Train loss: {} Train acc: {}'.format( i, train_loss, train_acc)) train_acc_array.append(train_acc) train_loss_array.append(train_loss) test_loss, test_acc = do_test(model, device, testloader, criterion) test_acc_array.append(test_acc) test_loss_array.append(test_loss) print('Test loss: {} Test acc: {}'.format(test_loss, test_acc)) if i % 50 == 0: learning_rate = learning_rate * 0.99 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate torch.save(model.state_dict(), args.weights_path[0]) save_data(train_loss_array, train_acc_array, test_loss_array, test_acc_array) plot_graph(train_loss_array, train_acc_array, test_loss_array, test_acc_array)
def bench_rnn_backward(batch_size=512, num_batch=100, vocab_size=1024, length=30, embed_size=128,\ hidden_size=128, delta=5): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") rnn = RNNNative(vocab_size, embed_size, hidden_size).to(device) delta = np.random.randint(-delta, delta, size=num_batch) input = torch.LongTensor(batch_size).random_(0, vocab_size).to(device) label = torch.LongTensor(batch_size).random_(0, vocab_size).to(device) criteria = nn.CrossEntropyLoss() optimizer = optim.SGD(rnn.parameters(), lr=0.01) time_elapsed = 0 for i in range(num_batch): input = torch.LongTensor(batch_size).random_(0, vocab_size).to(device) hx = torch.randn(batch_size, hidden_size).to(device) loss = 0 optimizer.zero_grad() start = time.time() for j in range(length + delta[i]): output, hx = rnn(input, hx) loss += criteria(output, label) loss.backward() optimizer.step() time_elapsed += time.time() - start print("Elapsed time for RNNNative backward {:.3f}, avg length: {:.3f}".format(time_elapsed, length + np.mean(delta))) rnn = RNNTorch(vocab_size, embed_size, hidden_size).to(device) input = torch.LongTensor(length, batch_size).random_(0, vocab_size).to(device) label = torch.LongTensor(length*batch_size).random_(0, vocab_size).to(device) start = time.time() for i in range(num_batch): loss = 0 optimizer.zero_grad() output, _ = rnn(input) loss = criteria(output.view(length*batch_size, -1), label) loss.backward() optimizer.step() end = time.time() print("Elapsed time for RNNTorch backward {:.3f}, avg length: {:.3f}".format(end - start, length + np.mean(delta))) input = torch.LongTensor(1, batch_size).random_(0, vocab_size).to(device) label = torch.LongTensor(batch_size).random_(0, vocab_size).to(device) hx = torch.randn(1, batch_size, hidden_size).to(device) time_elapsed = 0 for i in range(num_batch): loss = 0 input = torch.LongTensor(1, batch_size).random_(0, vocab_size).to(device) optimizer.zero_grad() start = time.time() for j in range(length + delta[i]): output, hx = rnn(input, hx) loss += criteria(output.view(batch_size, -1), label) loss.backward() optimizer.step() time_elapsed += time.time() - start print("Elapsed time for RNNTorch step backward {:.3f}, avg length: {:.3f}".format(time_elapsed, length + np.mean(delta))) # mlp label = torch.LongTensor(batch_size).random_(0, vocab_size).to(device) mlp = MLP(device, vocab_size, embed_size, hidden_size, length).to(device) input = [] hx = [] for i in range(length): input.append(torch.LongTensor(batch_size).random_(0, vocab_size).to(device)) hx.append(torch.randn(batch_size, hidden_size).to(device)) optimizer = optim.SGD(mlp.parameters(), lr=0.01) start = time.time() for i in range(num_batch): optimizer.zero_grad() output = mlp(input, hx) loss = 0 for i in range(length): loss += criteria(output[i], label) loss.backward optimizer.step() end = time.time() print("Elapsed time for MLP backward {:.3f}".format(end - start)) # mlp2 mlp2 = MLP2(device, vocab_size, embed_size, hidden_size, length).to(device) input = torch.LongTensor(batch_size).random_(0, vocab_size).to(device) hx = torch.randn(batch_size, hidden_size).to(device) optimizer = optim.SGD(mlp2.parameters(), lr=0.01) start = time.time() for i in range(num_batch): optimizer.zero_grad() loss = 0 for i in range(length): output = mlp2(input, hx) loss += criteria(output, label) loss.backward optimizer.step() end = time.time() print("Elapsed time for MLP2 backward {:.3f}".format(end - start))