def driver(): dataset = build() delaylist = [ 'ArrDelay', 'DepDelay', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay' ] #plotStats(dataset, plotlist1, 'SFO') #print(dataset.columns.tolist()) dataset = dataset.reset_index() dataset.fillna(0) #Converting categorical features to numerics dataset["Dest"] = dataset["Dest"].astype('category') dataset["Dest"] = dataset["Dest"].cat.codes #dataset = dataset.sample(n=20000) dataset['Date'] = dataset['Date'].apply(lambda x: x.timestamp()) dataSFO = dataset.loc[dataset['Origin'].isin(['SFO'])] dataOAK = dataset.loc[dataset['Origin'].isin(['OAK'])] dataSFO = dataSFO.iloc[0:10000] dataOAK = dataOAK.iloc[0:10000] frames = [dataSFO, dataOAK] NNdata = pd.concat(frames) #NNdata = NNdata.sample(n=20000) labels = NNdata["Origin"] NNdata.drop('Origin', axis=1, inplace=True) delayset = dataset[delaylist] c1 = dataset.DayOfWeek.unique() #labels = dataset["Origin"] le = LabelEncoder() labels = le.fit_transform(labels) labels = np_utils.to_categorical(labels, 2) data = NNdata x_train, x_test, y_train, y_test = train_test_split(data, labels, train_size=0.8) FeedForward(x_train, x_test, y_train, y_test, len(NNdata.dtypes))
def main(): parser = argparse.ArgumentParser( 'Train a simple classifier on a toy dataset') parser.add_argument('--dataset', type=str, default='') parser.add_argument('--train-fraction', type=float, default=.5, help='proportion of the dataset to use for training') parser.add_argument('--n-samples', type=int, default=10000) parser.add_argument('--hidden-size', type=int, default=512, help='Hidden size of the cleanup network') parser.add_argument('--epochs', type=int, default=20) parser.add_argument('--batch-size', type=int, default=32) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--seed', type=int, default=13) parser.add_argument('--logdir', type=str, default='trained_models/simple_classifier', help='Directory for saved model and tensorboard log') parser.add_argument('--load-model', type=str, default='', help='Optional model to continue training from') parser.add_argument( '--name', type=str, default='', help= 'Name of output folder within logdir. Will use current date and time if blank' ) parser.add_argument('--weight-histogram', action='store_true', help='Save histograms of the weights if set') args = parser.parse_args() np.random.seed(args.seed) torch.manual_seed(args.seed) rng = np.random.RandomState(seed=args.seed) dataset_train = ToyDataset(args.n_samples) dataset_test = ToyDataset(args.n_samples) trainloader = torch.utils.data.DataLoader( dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=0, ) # For testing just do everything in one giant batch testloader = torch.utils.data.DataLoader( dataset_test, batch_size=len(dataset_test), shuffle=False, num_workers=0, ) model = FeedForward(input_size=2, hidden_size=args.hidden_size, output_size=4) # Open a tensorboard writer if a logging directory is given if args.logdir != '': current_time = datetime.now().strftime('%b%d_%H-%M-%S') save_dir = osp.join(args.logdir, current_time) writer = SummaryWriter(log_dir=save_dir) if args.weight_histogram: # Log the initial parameters for name, param in model.named_parameters(): writer.add_histogram('parameters/' + name, param.clone().cpu().data.numpy(), 0) criterion = nn.CrossEntropyLoss() # criterion = nn.NLLLoss() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) for e in range(args.epochs): print('Epoch: {0}'.format(e + 1)) avg_loss = 0 n_batches = 0 for i, data in enumerate(trainloader): locations, labels = data if locations.size()[0] != args.batch_size: continue # Drop data, not enough for a batch optimizer.zero_grad() # outputs = torch.max(model(locations), 1)[1].unsqueeze(1) outputs = model(locations) loss = criterion(outputs, labels) avg_loss += loss.data.item() n_batches += 1 loss.backward() # print(loss.data.item()) optimizer.step() print(avg_loss / n_batches) if args.logdir != '': if n_batches > 0: avg_loss /= n_batches writer.add_scalar('avg_loss', avg_loss, e + 1) if args.weight_histogram and (e + 1) % 10 == 0: for name, param in model.named_parameters(): writer.add_histogram('parameters/' + name, param.clone().cpu().data.numpy(), e + 1) print("Testing") with torch.no_grad(): # Everything is in one batch, so this loop will only happen once for i, data in enumerate(testloader): locations, labels = data outputs = model(locations) loss = criterion(outputs, labels) print(loss.data.item()) if args.logdir != '': # TODO: get a visualization of the performance writer.add_scalar('test_loss', loss.data.item()) # Close tensorboard writer if args.logdir != '': writer.close() torch.save(model.state_dict(), osp.join(save_dir, 'model.pt')) params = vars(args) with open(osp.join(save_dir, "params.json"), "w") as f: json.dump(params, f)
trainloader = torch.utils.data.DataLoader( dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=0, ) # For testing just do everything in one giant batch testloader = torch.utils.data.DataLoader( dataset_test, batch_size=len(dataset_test), shuffle=False, num_workers=0, ) model = FeedForward(input_size=train_inputs.shape[1], output_size=train_outputs.shape[1]) if args.load_saved_model: model.load_state_dict(torch.load(args.load_saved_model), strict=False) # Open a tensorboard writer if a logging directory is given if args.logdir != '': current_time = datetime.now().strftime('%b%d_%H-%M-%S') save_dir = os.path.join(args.logdir, current_time) writer = SummaryWriter(log_dir=save_dir) if args.weight_histogram: # Log the initial parameters for name, param in model.named_parameters(): writer.add_histogram('parameters/' + name, param.clone().cpu().data.numpy(), 0)
def train(args): # setup metric logging. It's important to log your loss!! log_f = open(args.log_file, 'w') fieldnames = ['step', 'train_loss', 'train_acc', 'dev_loss', 'dev_acc'] logger = csv.DictWriter(log_f, fieldnames) logger.writeheader() # load data train_data, train_labels = load(args.data_dir, split="train") if args.model.lower() == "best": try: train_data = pickle.load(open('./obj/train_data.pkl', 'rb')) train_labels = pickle.load(open('./obj/train_label.pkl', 'rb')) print('loaded transformed data.') except FileNotFoundError: transforms.double_batch(train_data.astype(np.float32), train_labels.astype(np.int)) dev_data, dev_labels = load(args.data_dir, split="dev") # Build model if args.model.lower() == "simple-ff": model = FeedForward(args.ff_hunits) elif args.model.lower() == "simple-cnn": model = SimpleConvNN(args.cnn_n1_channels, args.cnn_n1_kernel, args.cnn_n2_kernel) elif args.model.lower() == "best": # TODO: Feel free to change in initialization arguments here to take # whatever parameters you need. print("training model:: channel_init- " + str(args.channel_size) + " compression_ratio- " + str(args.compression_ratio) + " initial kernel- " + str(args.initial_kernel) + " interior kernel- " + str(args.interior_kernel) + ' first hidden- ' + str(args.hidden)) model = BestNN(args.hidden, args.channel_size, args.final_channel_size, args.compression_ratio, args.initial_kernel, args.interior_kernel) else: raise Exception("Unknown model type passed in!") optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # TODO: You can change this loop as you need to, to optimize your training! # for example, if you wanted to implement early stopping to make sure you # don't overfit your model, you would do so in this loop. for step in range(args.train_steps): # run the model and backprop for train steps i = np.random.choice(train_data.shape[0], size=args.batch_size, replace=False) x = torch.from_numpy(train_data[i].astype(np.float32)) y = torch.from_numpy(train_labels[i].astype(np.int)) # Forward pass: Get logits for x logits = model(x) # Compute loss loss = F.cross_entropy(logits, y) if step % 25 == 0: print(str(step) + " :Loss (cross entropy): " + str(loss.item())) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() # every 100 steps, log metrics if (step + 1) % 50 == 0 or step == 0: train_acc, train_loss = approx_train_acc_and_loss( model, train_data, train_labels) dev_acc, dev_loss = dev_acc_and_loss(model, dev_data, dev_labels) step_metrics = { 'step': step, 'train_loss': loss.item(), 'train_acc': train_acc, 'dev_loss': dev_loss, 'dev_acc': dev_acc } print( f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}' ) logger.writerow(step_metrics) if dev_acc > .92: print( f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}' ) logger.writerow(step_metrics) print(f'Done training. Saving model at {args.model_save}') torch.save(model, args.model_save) break # close the log file log_f.close() # save model print(f'Done training. Saving model at {args.model_save}') torch.save(model, args.model_save)
def train(args): # setup metric logging. It's important to log your loss!! log_f = open(args.log_file, 'w') fieldnames = ['step', 'train_loss', 'train_acc', 'dev_loss', 'dev_acc'] logger = csv.DictWriter(log_f, fieldnames) logger.writeheader() # load data train_data, train_labels = load(args.data_dir, split="train") dev_data, dev_labels = load(args.data_dir, split="dev") # Build model if args.model.lower() == "simple-ff": model = FeedForward(args.ff_hunits) elif args.model.lower() == "simple-cnn": model = SimpleConvNN(args.cnn_n1_channels, args.cnn_n1_kernel, args.cnn_n2_kernel) elif args.model.lower() == "best": # TODO: Feel free to change in initialization arguments here to take # whatever parameters you need. model = BestNN(args.cnn_n1_channels, args.cnn_n1_kernel, args.cnn_n2_kernel, args.linear_size, args.dropout) else: raise Exception("Unknown model type passed in!") optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # TODO: You can change this loop as you need to, to optimize your training! # for example, if you wanted to implement early stopping to make sure you # don't overfit your model, you would do so in this loop. for step in range(args.train_steps): # run the model and backprop for train steps i = np.random.choice(train_data.shape[0], size=args.batch_size, replace=False) x = torch.from_numpy(train_data[i].astype(np.float32)) y = torch.from_numpy(train_labels[i].astype(np.int)) y = y.long() # Forward pass: Get logits for x logits = model(x) # Compute loss loss = F.cross_entropy(logits, y) # Zero gradients, perform a backward pass, and update the weights. if args.model.lower() == "best": optimizer_best = torch.optim.Adam(model.parameters(), lr=args.learning_rate * (0.5**(step // 1000))) optimizer_best.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() # every 100 steps, log metrics if step % 100 == 0: train_acc, train_loss = approx_train_acc_and_loss( model, train_data, train_labels) dev_acc, dev_loss = dev_acc_and_loss(model, dev_data, dev_labels) step_metrics = { 'step': step, 'train_loss': loss.item(), 'train_acc': train_acc, 'dev_loss': dev_loss, 'dev_acc': dev_acc } print( f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}' ) logger.writerow(step_metrics) # close the log file log_f.close() # save model print(f'Done training. Saving model at {args.model_save}') torch.save(model, args.model_save)
def main(): if not os.path.isdir(CHECKPOINT): os.makedirs(CHECKPOINT) print('==> Preparing dataset') trainloader, validloader, testloader = load_MNIST(batch_size=BATCH_SIZE, num_workers=NUM_WORKERS) CLASSES = [] AUROCs = [] auroc = AverageMeter() for t, cls in enumerate(ALL_CLASSES): print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES))) CLASSES = [cls] print("==> Creating model") model = FeedForward(num_classes=1) if CUDA: model = model.cuda() model = nn.DataParallel(model) cudnn.benchmark = True print(' Total params: %.2fK' % (sum(p.numel() for p in model.parameters()) / 1000)) criterion = nn.BCELoss() optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) print("==> Learning") best_loss = 1e10 learning_rate = LEARNING_RATE for epoch in range(EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, EPOCHS)) train_loss = train(trainloader, model, criterion, CLASSES, CLASSES, optimizer=optimizer, use_cuda=CUDA) test_loss = train(validloader, model, criterion, CLASSES, CLASSES, test=True, use_cuda=CUDA) # save model is_best = test_loss < best_loss best_loss = min(test_loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'loss': test_loss, 'optimizer': optimizer.state_dict() }, CHECKPOINT, is_best) print("==> Calculating AUROC") filepath_best = os.path.join(CHECKPOINT, "best.pt") checkpoint = torch.load(filepath_best) model.load_state_dict(checkpoint['state_dict']) new_auroc = calc_avg_AUROC(model, testloader, CLASSES, CLASSES, CUDA) auroc.update(new_auroc) print('New Task AUROC: {}'.format(new_auroc)) print('Average AUROC: {}'.format(auroc.avg)) AUROCs.append(auroc.avg) print('\nAverage Per-task Performance over number of tasks') for i, p in enumerate(AUROCs): print("%d: %f" % (i + 1, p))
def train(args): # setup metric logging log_f = open(args.log_file, 'w') fieldnames = ['step', 'train_loss', 'train_acc', 'dev_loss', 'dev_acc'] logger = csv.DictWriter(log_f, fieldnames) logger.writeheader() # load data train_data, train_labels = load(args.data_dir, split="train") dev_data, dev_labels = load(args.data_dir, split="dev") # Build model if args.model.lower() == "simple-ff": model = FeedForward(args.ff_hunits) elif args.model.lower() == "simple-cnn": model = SimpleConvNN(args.cnn_n1_channels, args.cnn_n1_kernel, args.cnn_n2_kernel) elif args.model.lower() == "best": model = BestNN(args.best_n1_channels, args.best_n2_channels, args.best_n1_kernel, args.best_n2_kernel, args.best_lin1_trans, args.best_lin2_trans) else: raise Exception("Unknown model type passed in!") optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) for step in range(args.train_steps): # run the model and backprop for train steps i = np.random.choice(train_data.shape[0], size=args.batch_size, replace=False) x = torch.from_numpy(train_data[i].astype(np.float32)) y = torch.from_numpy(train_labels[i].astype(np.int)) # Forward pass: Get logits for x logits = model(x) # Compute loss loss = F.cross_entropy(logits, y) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() # every 100 steps, log metrics if step % 100 == 0: train_acc, train_loss = approx_train_acc_and_loss( model, train_data, train_labels) dev_acc, dev_loss = dev_acc_and_loss(model, dev_data, dev_labels) step_metrics = { 'step': step, 'train_loss': loss.item(), 'train_acc': train_acc, 'dev_loss': dev_loss, 'dev_acc': dev_acc } print( f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}' ) logger.writerow(step_metrics) # close the log file log_f.close() # save model print(f'Done training. Saving model at {args.model_save}') torch.save(model, args.model_save)
def train(args): np.random.seed(42) torch.manual_seed(42) # setup metric logging. It's important to log your loss!! log_f = open(args.log_file, 'w') fieldnames = ['step', 'train_loss', 'train_acc', 'dev_loss', 'dev_acc'] logger = csv.DictWriter(log_f, fieldnames) logger.writeheader() # load data train_data, train_labels = load(args.data_dir, split="train") dev_data, dev_labels = load(args.data_dir, split="dev") # Build model if args.model.lower() == "simple-ff": model = FeedForward(args.ff_hunits) elif args.model.lower() == "simple-cnn": model = SimpleConvNN(args.cnn_n1_channels, args.cnn_n1_kernel, args.cnn_n2_kernel) elif args.model.lower() == "best": model = BestNN(args.best_n1_channels, args.best_n2_channels, args.best_n3_channels) else: raise Exception("Unknown model type passed in!") optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) if not args.train_my_way: for step in range(args.train_steps): # run the model and backprop for train steps i = np.random.choice(train_data.shape[0], size=args.batch_size, replace=False) x = torch.from_numpy(train_data[i].astype(np.float32)) y = torch.from_numpy(train_labels[i].astype(np.int)) # Forward pass: Get logits for x logits = model(x) # Compute loss loss = F.cross_entropy(logits, y) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() # every 100 steps, log metrics if step % 100 == 0: train_acc, train_loss = approx_train_acc_and_loss(model, train_data, train_labels) dev_acc, dev_loss = dev_acc_and_loss(model, dev_data, dev_labels) step_metrics = { 'step': step, 'train_loss': loss.item(), 'train_acc': train_acc, 'dev_loss': dev_loss, 'dev_acc': dev_acc } print(f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}') logger.writerow(step_metrics) # close the log file log_f.close() # save model print(f'Done training. Saving model at {args.model_save}') torch.save(model, args.model_save) else: ''' MY OPTIMIZATION SCHEME Three conditions decide whether to continue training 1. Always train for at least 'min_steps', and no more than 'max_steps' 2. If dev acc drops by 'stepwise_cushion' or more between measured points (every 100 steps), stop training 3. If dev acc has improved by less than 'timesaver_cushion' in the past 1000 iteration ''' # Set up improving last_acc = 0 improving = True # Set up got_time last1000 = 0 got_time = True step = 0 while step <= args.max_iter and (step <= args.min_iter or (improving and got_time)): # run the model and backprop for train steps i = np.random.choice(train_data.shape[0], size=args.batch_size, replace=False) x = torch.from_numpy(train_data[i].astype(np.float32)) y = torch.from_numpy(train_labels[i].astype(np.int)) # Forward pass: Get logits for x logits = model(x) # Compute loss loss = F.cross_entropy(logits, y) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() # every 100 steps, log metrics if step % 100 == 0: train_acc, train_loss = approx_train_acc_and_loss(model, train_data, train_labels) dev_acc, dev_loss = dev_acc_and_loss(model, dev_data, dev_labels) step_metrics = { 'step': step, 'train_loss': loss.item(), 'train_acc': train_acc, 'dev_loss': dev_loss, 'dev_acc': dev_acc } print(f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}') logger.writerow(step_metrics) # Update conditions diff = dev_acc - last_acc improving = diff > args.stepwise_cushion last_acc = dev_acc if step % 1000 == 0: got_time = dev_acc - last1000 > args.timesaver_cushion last1000 = dev_acc step += 1 # close the log file log_f.close() # save model print(f'Done training. Saving model at {args.model_save}') torch.save(model, args.model_save)
spatial_encoding=args.spatial_encoding, ) maze_name = 'singlemaze' else: validation_set = ValidationSet( data=data, maze_sps=maze_sps, maze_indices=[0, 1, 2, 3], goal_indices=[0, 1], subsample=args.subsample, spatial_encoding=args.spatial_encoding, ) maze_name = 'multimaze' if args.spatial_encoding == 'learned': # input is maze, loc, goal ssps, output is 2D direction to move model = LearnedEncoding(input_size=repr_dim, maze_id_size=id_size, hidden_size=512, output_size=2) else: # input is maze, loc, goal ssps, output is 2D direction to move model = FeedForward(input_size=id_size + repr_dim * 2, output_size=2) if args.load_saved_model: model.load_state_dict(torch.load(args.load_saved_model), strict=False) model.eval() # Open a tensorboard writer if a logging directory is given if args.logdir != '': current_time = datetime.now().strftime('%b%d_%H-%M-%S') save_dir = os.path.join(args.logdir, current_time) writer = SummaryWriter(log_dir=save_dir) criterion = nn.MSELoss() print("Visualization")
import sys import torch import torch.nn as nn from models import FeedForward from toy_dataset import ToyDataset, plot_data import matplotlib.pyplot as plt import numpy as np fname = sys.argv[1] n_samples = 10000 hidden_size = 512 model = FeedForward(input_size=2, hidden_size=hidden_size, output_size=4) model.load_state_dict(torch.load(fname), strict=True) model.eval() dataset_test = ToyDataset(n_samples) # For testing just do everything in one giant batch testloader = torch.utils.data.DataLoader( dataset_test, batch_size=len(dataset_test), shuffle=False, num_workers=0, ) criterion = nn.CrossEntropyLoss() with torch.no_grad(): # Everything is in one batch, so this loop will only happen once
def main(): if not os.path.isdir(CHECKPOINT): os.makedirs(CHECKPOINT) print('==> Preparing dataset') trainloader, validloader, testloader = load_MNIST(batch_size=BATCH_SIZE, num_workers=NUM_WORKERS) print("==> Creating model") model = FeedForward(num_classes=len(ALL_CLASSES)) if CUDA: model = model.cuda() # model = nn.DataParallel(model) cudnn.benchmark = True # initialize parameters # for name, param in model.named_parameters(): # if 'bias' in name: # param.data.zero_() # elif 'weight' in name: # param.data.normal_(0, 0.005) print(' Total params: %.2fK' % (sum(p.numel() for p in model.parameters()) / 1000)) criterion = nn.BCELoss() CLASSES = [] AUROCs = [] for t, cls in enumerate(ALL_CLASSES): print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES))) CLASSES.append(cls) if t == 0: print("==> Learning") optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) penalty = L1Penalty(coeff=L1_COEFF) best_loss = 1e10 learning_rate = LEARNING_RATE # epochs = 10 for epoch in range(MAX_EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS)) train_loss = train(trainloader, model, criterion, ALL_CLASSES, [cls], optimizer=optimizer, penalty=penalty, use_cuda=CUDA) test_loss = train(validloader, model, criterion, ALL_CLASSES, [cls], test=True, penalty=penalty, use_cuda=CUDA) # save model is_best = test_loss < best_loss best_loss = min(test_loss, best_loss) save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT, is_best) suma = 0 for p in model.parameters(): p = p.data.cpu().numpy() suma += (abs(p) < ZERO_THRESHOLD).sum() print("Number of zero weights: %d" % suma) else: # if t != 0 # copy model model_copy = copy.deepcopy(model) print("==> Selective Retraining") # Solve Eq.3 # freeze all layers except the last one (last 2 parameters) params = list(model.parameters()) for param in params[:-2]: param.requires_grad = False optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) penalty = L1Penalty(coeff=L1_COEFF) best_loss = 1e10 learning_rate = LEARNING_RATE for epoch in range(MAX_EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS)) train(trainloader, model, criterion, ALL_CLASSES, [cls], optimizer=optimizer, penalty=penalty, use_cuda=CUDA) train(validloader, model, criterion, ALL_CLASSES, [cls], test=True, penalty=penalty, use_cuda=CUDA) for param in model.parameters(): param.requires_grad = True print("==> Selecting Neurons") hooks = select_neurons(model, t) print("==> Training Selected Neurons") optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=1e-4) best_loss = 1e10 learning_rate = LEARNING_RATE for epoch in range(MAX_EPOCHS): # decay learning rate if (epoch + 1) % EPOCHS_DROP == 0: learning_rate *= LR_DROP for param_group in optimizer.param_groups: param_group['lr'] = learning_rate print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS)) train_loss = train(trainloader, model, criterion, ALL_CLASSES, [cls], optimizer=optimizer, use_cuda=CUDA) test_loss = train(validloader, model, criterion, ALL_CLASSES, [cls], test=True, use_cuda=CUDA) # save model is_best = test_loss < best_loss best_loss = min(test_loss, best_loss) save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT, is_best) # remove hooks for hook in hooks: hook.remove() print("==> Splitting Neurons") split_neurons(model_copy, model) print("==> Calculating AUROC") filepath_best = os.path.join(CHECKPOINT, "best.pt") checkpoint = torch.load(filepath_best) model.load_state_dict(checkpoint['state_dict']) auroc = calc_avg_AUROC(model, testloader, ALL_CLASSES, CLASSES, CUDA) print('AUROC: {}'.format(auroc)) AUROCs.append(auroc) print('\nAverage Per-task Performance over number of tasks') for i, p in enumerate(AUROCs): print("%d: %f" % (i + 1, p))
# input is maze, loc, goal ssps, output is 2D direction to move if args.n_hidden_layers > 1: model = MLP(input_size=id_size + repr_dim * 2, hidden_size=args.hidden_size, output_size=2, n_layers=args.n_hidden_layers) else: if args.spatial_encoding == 'learned': model = LearnedEncoding(input_size=repr_dim, maze_id_size=id_size, hidden_size=args.hidden_size, output_size=2) else: model = FeedForward(input_size=id_size + repr_dim * 2, hidden_size=args.hidden_size, output_size=2) if args.load_saved_model: model.load_state_dict(torch.load(args.load_saved_model), strict=False) # Open a tensorboard writer if a logging directory is given if args.logdir != '': current_time = datetime.now().strftime('%b%d_%H-%M-%S') save_dir = os.path.join(args.logdir, current_time) writer = SummaryWriter(log_dir=save_dir) if args.weight_histogram: # Log the initial parameters for name, param in model.named_parameters(): writer.add_histogram('parameters/' + name, param.clone().cpu().data.numpy(), 0)
def train(args): """ This function trains the models :param args: the command line arguments defining the desired actions """ # load data train_data_all, dev_data_all, _ = load(args.data_dir, cachedir=args.cachedir, override_cache=args.override_cache, text_only=(args.model.lower() in ["bi-lstm", "bert"]), include_tfidf=args.include_tfidf, balanced=args.balanced) train_data, train_labels = train_data_all.X, train_data_all.y dev_data, dev_labels = dev_data_all.X, dev_data_all.y # Build model apx = get_appendix(args.include_tfidf, args.balanced) if args.model.lower() == "simple-ff": model = FeedForward(args.ff_hunits, train_data.shape[1]) train_pytorch(args, model, train_data, train_labels, dev_data, dev_labels, save_model_path=f"models/simple-ff{apx}.torch") elif args.model.lower() == "bi-lstm": model = BiLSTM(epochs=args.num_epochs, batch_size=args.batch_size, max_seq_len=args.max_seq_len) model.train(train_data, train_labels, dev_data, dev_labels) elif args.model.lower() == "logreg": model = LogisticRegression() model.train(train_data, train_labels, dev_data, dev_labels, save_model_path=f"models/logreg{apx}.pkl") elif args.model.lower() == "majority-vote": model = MajorityVote() model.train(train_labels, dev_labels) elif args.model.lower() == "bert": model = Bert(epochs=args.num_epochs, batch_size=args.batch_size, max_seq_len=args.max_seq_len, learning_rate=args.learning_rate) model.train(train_data, train_labels, dev_data, dev_labels, save_model_path=f"models/bert.pkl") elif args.model.lower() == "svm": model = SVM() model.train(train_data, train_labels, save_model_path=f"models/svm{apx}.sav") else: raise Exception("Unknown model type passed in!")
for i in range(n_goals): sp_name = possible_objects[i] x_env, y_env = env.object_locations[sp_name][[0, 1]] # Need to scale to SSP coordinates # Env is 0 to 13, SSP is -5 to 5 x = ((x_env - 0) / coarse_size) * limit_range + xs[0] y = ((y_env - 0) / coarse_size) * limit_range + ys[0] item_memory += vocab[sp_name] * encode_point(x, y, x_axis_sp, y_axis_sp) item_memory.normalize() # Component functions of the full system cleanup_network = FeedForward(input_size=ssp_dim, hidden_size=512, output_size=ssp_dim) cleanup_network.load_state_dict(torch.load(args.cleanup_network), strict=True) cleanup_network.eval() # Input is x and y velocity plus the distance sensor measurements, plus map ID localization_network = LocalizationModel( input_size=2 + n_sensors + n_maps, unroll_length=1, #rollout_length, sp_dim=ssp_dim) localization_network.load_state_dict(torch.load(args.localization_network), strict=True) localization_network.eval() if args.n_hidden_layers_policy == 1: policy_network = FeedForward(input_size=id_size + ssp_dim * 2,
rel_pretrained = model.rels.weight.data # ========================================= # Initialize MODEL # ========================================= if args.model == 'transE': model = TransE(len(rel2id), len(ent2id), dim=config['embedding_dim'], norm=config['norm'], margin=config['margin'], l2reg=config['l2reg']) if args.model == 'transH': model = TransH(len(rel2id), len(ent2id), dim=config['embedding_dim'], norm=config['norm'], margin=config['margin'], l2reg=config['l2reg']) elif args.model == 'subjD': model = SubjKB_Deviation(len(rel2id), len(ent2id), len(src2id), dim=config['embedding_dim'], norm=config['norm'], margin=config['margin'], l2reg=config['l2reg'], relPretrained=rel_pretrained, entPretrained=ent_pretrained) elif args.model == 'subjM': model = SubjKB_Matrix(len(rel2id), len(ent2id), len(src2id), dim=config['embedding_dim'], norm=config['norm'], nonlinearity='tanh') elif args.model == 'ff': model = FeedForward(len(rel2id), len(ent2id), dim=config['embedding_dim']) elif args.model == 'ffs': model = FeedForward_Source(len(rel2id), len(ent2id), len(src2id), dim=config['embedding_dim']) elif args.model == 'hyte': model = HyTE(len(rel2id), len(ent2id), len(src2id), dim=config['embedding_dim'], norm=config['norm'], margin=config['margin'], l2reg=config['l2reg']) # model.to(device) # Logger if args.mode.startswith('train'): logger = Logger(config['name'], ['loss', 'val_loss', 'MR', 'MRR', 'h@10']) else: logger = None # Loss function criterion = MarginRankingLoss(config['margin'], reduction='sum')