def eval(X, Y, model): model.cpu() y_pred = [[],[]] for i in range(len(X)): x = X[i] if len(x) == 0: y_pred[0].append(0.) y_pred[1].append(0.) else: sent_tensor = Variable(sent_to_tensor(x)) hidden = model.init_hidden() cell = model.init_hidden() for i in range(len(x)): logit1, logit2, hidden, cell = model(sent_tensor[i], hidden, cell) y_pred[0].append(logit1.data.numpy()[0]) y_pred[1].append(logit2.data.numpy()[0]) y1 = np.array(y_pred[0]) y_t1 = [Y[i][0] for i in range(len(Y))] y_t1 = np.array(y_t1) ccc1, _ = ccc(y_t1, y1) mse1 = mse(y_t1, y1) y2 = np.array(y_pred[1]) y_t2 = [Y[i][1] for i in range(len(Y))] y_t2 = np.array(y_t2) ccc2, _ = ccc(y_t2, y2) mse2 = mse(y_t2, y2) model.cuda() return ccc1, ccc2, mse1, mse2
def load_model(): model = torch.load(MODEL_CHECKPOINT) if USE_CUDA: model.cuda() else: model.cpu() return model
def predict(model, valid_data): model = model.cuda() model = model.eval() preds = [] with torch.no_grad(): for img in valid_data: batch_size = img.shape[0] img = img.cuda() out = model(img) pred = out.argmax(dim=1) preds.append(pred.cpu().numpy()) preds = np.concatenate(preds, axis=0) model = model.cpu() return preds
else: # Load GPU model on CPU model = torch.load(args.checkpoint, map_location=lambda storage, loc: storage) if args.finetune: assert args.pretrained, "you must specify a pre-trained model" with open(args.pretrained, 'rb') as f: model = torch.load(f) print("loaded pre-trained model...") if args.cuda: model.cuda() else: model.cpu() print(model) criterion = nn.CrossEntropyLoss() if args.cuda: criterion.cuda() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Variables, to detach them from their history.""" if type(h) == Variable: return Variable(h.data)
eval_batch_size = args.batch_size // 8 train_data = batchify(corpus.train, args.batch_size) dev_data = batchify(corpus.dev, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = corpus.dictionary.__len__() model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) if args.cuda: model.cuda() else: model.cpu() criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Variables, to detach them from their history.""" if type(h) == Variable: return Variable(h.data) else: return tuple(repackage_hidden(v) for v in h)
def evaluate(data_dir, model_path, batch_size, chunk_size, use_cuda): # ===== printing(f"Evaluating {data_dir} & {model_path}") # read corpus, especially for vocab model_dir = os.path.dirname(model_path) # fn = 'corpus.{}.data'.format(hashlib.md5(data_dir.encode()).hexdigest()) fn = glob.glob(f"{model_dir}/corpus.*.data") assert len(fn) == 1 fn = fn[0] printing(f"Loading dataset from {fn}") corpus = torch.load(fn) word2idx = corpus.dictionary.word2idx # load model with open(model_path, 'rb') as f: printing(f"Loading model from {f}") model, criterion, optimizer = torch.load(f) if use_cuda: model = model.cuda() else: model = model.cpu() # Turn on evaluation mode which disables dropout. model.eval() # ===== # read test and eval test_file = os.path.join(data_dir, "test.txt") test_data = [] with open(test_file) as fd: for sid, line in enumerate(fd): tokens = line.split() + ['<eos>'] # there will be no oov since test is included in vocab when building? idxes = [word2idx[w] for w in tokens] one = {"sid": sid, "tokens": tokens, "idxes": idxes} test_data.append(one) # start to decode printing(f"Decoding with {len(test_data)} lines of data") # sort by length test_data.sort(key=lambda x: len(x["idxes"])) # batched evaluation bidx = 0 while bidx < len(test_data): next_bidx = min(len(test_data), bidx + batch_size) cur_data = test_data[bidx:next_bidx] bsize = len(cur_data) max_length = max([len(x["idxes"]) for x in cur_data]) # batch, 0 as padding cur_data_t = torch.full([bsize, max_length], 0, dtype=torch.long) for b in range(bsize): one_input = cur_data[b]["idxes"] cur_data_t[b][:len(one_input)] = torch.as_tensor(one_input) cur_data_t = cur_data_t.t().contiguous() # [max-length, bsize] if use_cuda: cur_data_t = cur_data_t.cuda() # loop logprobs = [torch.full([1, bsize], 0., dtype=torch.float32) ] # start from the first token, but does not predict it hidden = model.init_hidden(bsize) for start_idx in range(0, cur_data_t.size(0) - 1, chunk_size): end_idx = min(start_idx + chunk_size, cur_data_t.size(0) - 1) cur_input_t = cur_data_t[start_idx:end_idx] cur_target_t = cur_data_t[start_idx + 1:end_idx + 1] output, hidden = model(cur_input_t, hidden) output = model.decoder(output) hidden = repackage_hidden(hidden) # get log probs output_logprobs = torch.nn.functional.log_softmax( output.view(end_idx - start_idx, bsize, -1), -1) cur_logprobs = output_logprobs.gather( -1, cur_target_t.unsqueeze(-1)).squeeze(-1) # [len, bsize] logprobs.append(cur_logprobs) bidx = next_bidx # get the scores back final_logprobs = torch.cat(logprobs, 0).t().contiguous() # [bsize, max-length] if use_cuda: final_logprobs = final_logprobs.cpu() for v, d in zip(final_logprobs, cur_data): d["scores"] = v[:len(d["idxes"])].tolist() # return test_data.sort(key=lambda x: x["sid"]) return test_data
def_arr, test_corpus.dictionary, set_zero=True) logging("Vocab size pre-change: {}".format(len(model.old_dict.word2idx))) logging("Vocab size post-change: {}".format(len(model.dict.word2idx))) else: raise AssertionError( "new vocabulary provided but model vocab not changed or interpolated") test_data = batchify(test_corpus.test, args.test_batch_size, args) if args.cuda: model = model.cuda() criterion = criterion.cuda() else: model = model.cpu() criterion = criterion.cpu() # Run on test data. logging("Evaluating...") with torch.no_grad(): if args.hyp_search is not None: best_score = (np.inf, 0.0, 0.0) scores = np.zeros((5, 6)) import pickle # grid search is ok here bc for few hyperparams and small k, # it helps minimize gaps. also, based on Grave et al. (2016) # we expect lam and theta are ~equally important/sensitive here for i, lam in enumerate([0.833, 0.866, 0.9, 0.933, 0.966]): for j, theta in enumerate([0, 0.1, 0.3, 0.5, 0.7, 0.9]): args.lam = lam
def run(args, config, min_test_loss): # Change log file fileh = logging.FileHandler(args.logfile, 'w') formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s') fileh.setFormatter(formatter) logger = logging.getLogger('') # root logger logger.setLevel(logging.INFO) # Second handler is the file logger. for hdlr in logger.handlers[1:]: # remove all old handlers logger.removeHandler(hdlr) logger.addHandler(fileh) # set the new handler logger = logging.getLogger('run') logger.info('CONFIGURATION: %s', json.dumps(config, indent=2)) # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) init_state = torch.get_rng_state() logger.info('rng state: %s', init_state) ############################################################################### # Load data ############################################################################### corpus = data.Corpus(args.data, args.vocab_size) def batchify(data, bsz): nbatch = data.size(0) // bsz data = data.narrow(0, 0, nbatch * bsz) data = data.view(bsz, -1).t().contiguous() if args.cuda: data = data.cuda() return data eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### def load_embedding(corpus, glove_file="data/glove/glove.6B.{0}d.txt", line_to_load=100000): """ Function that populates a dictionary with word embedding vectors """ # resolve glove file glove_file = glove_file.format(args.emsize) if not os.path.exists(glove_file): logger.error("glove_file {0} not exist!".format(glove_file)) raise ValueError("glove_file {0} not exist!".format(glove_file)) ctr = 0 # This is the thing to return word_emb = np.random.uniform(-0.1, 0.1, size=(len(corpus.dictionary), args.emsize)) found_words = 0 with open(glove_file, "r") as f: for i, line in enumerate(f): ctr += 1 contents = line.split() word = contents[0].lower() if word in corpus.dictionary.word2idx: idx = corpus.dictionary.word2idx[word] word_emb[idx, :] = np.asarray(contents[1:]).astype(float) found_words += 1 if ctr >= line_to_load: break logger.info('found: %d', found_words) return torch.Tensor(word_emb) ntokens = len(corpus.dictionary) preload_emb = load_embedding( corpus) if args.initialization["word_embedding"] == "glove" else None model = RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, emb_init_method=args.initialization["word_embedding"], weight_init_method=args.initialization["weights"], preload_emb=preload_emb, dropout=args.dropout) criterion = nn.CrossEntropyLoss() if args.cuda: model = model.cuda() if args.optim == 'adam': opt = O.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: opt = O.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) ############################################################################### # Training code ############################################################################### def clip_gradient(model, clip): """Computes a gradient clipping coefficient based on gradient norm.""" totalnorm = 0 for p in model.parameters(): modulenorm = p.grad.data.norm() totalnorm += modulenorm**2 totalnorm = math.sqrt(totalnorm) return min(1, args.clip / (totalnorm + 1e-6)) def repackage_hidden(h): """Wraps hidden states in new Variables, to detach them from their history.""" if type(h) == Variable: return Variable(h.data) else: return tuple(repackage_hidden(v) for v in h) def get_batch(source, i, evaluation=False): seq_len = min(args.sequence_length, len(source) - 1 - i) data = Variable(source[i:i + seq_len], volatile=evaluation) target = Variable(source[i + 1:i + 1 + seq_len].view(-1)) return data, target def evaluate(data_source): total_loss = 0 model.eval() ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) for i in range(0, data_source.size(0) - 1, args.sequence_length): data, targets = get_batch(data_source, i, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source) def train(): model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden( args.batch_size, hidden_init_method=args.initialization["hidden_state"]) iter_idx = range(0, train_data.size(0) - 1, args.sequence_length) if args.shuffle: np.random.shuffle(iter_idx) for batch, i in enumerate(iter_idx): data, targets = get_batch(train_data, i) hidden = repackage_hidden(hidden) model.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() clipped_lr = lr * clip_gradient(model, args.clip) for param_group in opt.param_groups: param_group['lr'] = clipped_lr opt.step() total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time ppl = 0 try: ppl = math.exp(cur_loss) except OverflowError: ppl = float('inf') logger.info( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.sequence_length, lr, elapsed * 1000 / args.log_interval, cur_loss, ppl)) total_loss = 0 start_time = time.time() # Loop over epochs. lr = args.lr prev_val_loss = None epoch_logs = [] for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() train() val_loss = evaluate(val_data) logger.info('-' * 89) time_s = time.time() - epoch_start_time logger.info( '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, time_s, val_loss, math.exp(val_loss))) logger.info('-' * 89) epoch_logs.append({ 'epoch': epoch, 'time_s': time_s, 'val_loss': val_loss, 'val_ppl': math.exp(val_loss) }) # Anneal the learning rate. if prev_val_loss and val_loss > prev_val_loss: lr /= 4.0 logger.info('new learning rate: {}'.format(lr)) if lr < args.min_lr: logger.info('learning rate too small') break prev_val_loss = val_loss if epoch % 6 == 0: with open('models/snapshot.pt', 'wb') as f: torch.save(model, f) logger.info('saved snapshot model.') # Run on test data and save the model. test_loss = evaluate(test_data) logger.info('=' * 89) logger.info( '| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) logger.info('=' * 89) if args.save != '' and test_loss < min_test_loss: mcpu = model.cpu() with open(args.save, 'wb') as f: torch.save(mcpu, f) with open('models/best_model.pt', 'wb') as f: torch.save(mcpu, f) # Log results in a machine-readable JSON. result = {} result['config'] = config result['epoch_logs'] = epoch_logs result['test_loss'] = test_loss result['test_ppl'] = math.exp(test_loss) with open(args.results, 'w') as r: json.dump(result, r, indent=2) # Revert random state. torch.set_rng_state(init_state) return test_loss