Пример #1
0
def train(num_epochs, config, data_loader, multigpu=False):
    def maybe_evaluate(model, epoch, prev_best, prev_best_acc):
        best_model = prev_best
        best_test_acc = prev_best_acc
        test_acc = None
        if epoch % 100 == 99:
            ooo_acc = evaluate(model, ooo_loader)
            test_acc = evaluate(model, test_loader)
            print('epoch {} test: {:.2f}; ooo: {:.2f}'.format(
                epoch, test_acc, ooo_acc))
            if test_acc > prev_best_acc:
                best_test_acc = test_acc
                best_model = model
                print('saving new model')
                torch.save(best_model, 'best.model.testing')
        return best_model, best_test_acc, test_acc

    def maybe_report_time():
        if False and epoch % 100 == 0 and epoch > 0:
            finish_time = time.clock()
            time_per_epoch = (finish_time - start_time) / epoch
            print('Average time per epoch: {:.2} sec'.format(time_per_epoch))

    puzzle_gen = config.create_puzzle_generator()
    ooo_dataset = OddOneOutDataset(puzzle_gen, 5, 'data/ooo/living.tsv')
    ooo_loader = OddOneOutDataloader(ooo_dataset).get_loaders()[0]

    start_time = time.clock()
    net_factory = config.create_network_factory()
    model = net_factory(data_loader.input_size(), data_loader.output_size())
    if multigpu and torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        #dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        model = nn.DataParallel(model)
    model = cudaify(model)
    loss_function = nn.NLLLoss()
    optimizer = config.create_optimizer_factory()(model.parameters())
    best_model = None
    best_test_acc = -1.0
    scores = []
    for epoch in range(num_epochs):
        model.train()
        model.zero_grad()
        loader, test_loader = data_loader.get_loaders(epoch)
        for data, response in loader:
            input_matrix = cudaify(data)
            log_probs = model(input_matrix)
            loss = loss_function(log_probs, cudaify(response))
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
            optimizer.step()
        best_model, best_test_acc, test_acc = maybe_evaluate(
            model, epoch, best_model, best_test_acc)
        if test_acc is not None:
            scores.append((epoch, test_acc))
        if best_test_acc >= .98:
            break
        maybe_report_time()
    return best_model, scores
Пример #2
0
def train(num_epochs, config, data_loader, multigpu=False):
    
    def maybe_evaluate(prev_best, prev_best_acc):
        best = prev_best
        best_accuracy = prev_best_acc
        test_accuracy = None
        if epoch % 100 == 99:
            test_accuracy = evaluate(model, test_loader)
            print('epoch {} test: {:.2f}'.format(epoch, test_accuracy))
            if test_accuracy > prev_best_acc:
                best_accuracy = test_accuracy
                best = model
                print('saving new model')
                torch.save(best, 'best.model.testing')
        return best, best_accuracy, test_accuracy
    
    def maybe_report_time():
        if False and epoch % 100 == 0 and epoch > 0:
            finish_time = time.process_time()
            time_per_epoch = (finish_time - start_time) / epoch
            print('Average time per epoch: {:.2} sec'.format(time_per_epoch))

    start_time = time.process_time()
    print('epoch: {}'.format(epoch))
    net_factory = config.create_network_factory()
    model = net_factory(data_loader.input_size(), data_loader.output_size())
    if multigpu and torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        model = nn.DataParallel(model)    
    model = cudaify(model)
    loss_function = nn.NLLLoss()
    optimizer = config.create_optimizer_factory()(model.parameters())
    best_model = None
    best_test_acc = -1.0
    scores = []    
    for epoch in range(num_epochs):
        model.train()
        model.zero_grad()
        loader, test_loader = data_loader.get_loaders(epoch)
        for data, response in loader:
            input_matrix = cudaify(data)
            torch.set_printoptions(profile="full")
            log_probs = model(input_matrix)
            loss = loss_function(log_probs, cudaify(response))
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
            optimizer.step()
        best_model, best_test_acc, test_acc = maybe_evaluate(best_model, best_test_acc)
        if test_acc is not None:
            scores.append((epoch, test_acc))
        if best_test_acc >= .95:
            break
        maybe_report_time()
    return best_model, scores
Пример #3
0
 def make_puzzle_matrix(self, tok_puzzles):
     '''
     concatenate first 4 tokens if exist, then merge the rest tokens 
     and append it to the end
     
     TODO: Is it possible to get rid of the topmost for-loop using torch tensor ops??
     
     '''
     matrix = []
     for tok_puzzle in tok_puzzles:
         choices, _ = tok_puzzle
         oneHotVec = []
         for choice in choices:
             choice_Vec_list = [one_hot(tok, self.vocab) for tok in choice]
             if len(choice_Vec_list) > (self.num_tok - 1):
                 choice_Vec_list[self.num_tok - 1] = [
                     sum(vec)
                     for vec in zip(*choice_Vec_list[self.num_tok - 1:])
                 ]
                 choice_Vec_list = choice_Vec_list[:self.num_tok]
             result = [tok for word in choice_Vec_list for tok in word]
             appendix = [0] * (self.num_tok * len(self.vocab) - len(result))
             oneHotVec += result + appendix
         matrix.append(oneHotVec)
     result = cudaify(FloatTensor(matrix))
     return result
Пример #4
0
def predict(model, input_tensor):
    with torch.no_grad():
        model.eval()
        input_matrix = cudaify(input_tensor)
        log_probs = model(input_matrix)
        predictions = log_probs.argmax(dim=1)
        return predictions
Пример #5
0
 def make_puzzle_matrix(self, puzzles):
     matrix = []
     for puzzle in puzzles:
         choices, _ = puzzle
         oneHotVec = []
         for choice in choices:
             oneHotVec += one_hot(str(choice), self.get_vocab())
         matrix.append(oneHotVec)
     return cudaify(FloatTensor(matrix))
Пример #6
0
def make_puzzle_targets(labels):
    return cudaify(LongTensor(labels))
Пример #7
0
def make_puzzle_target(label):
    return cudaify(LongTensor([label]))
Пример #8
0
def make_puzzle_vector(puzzle, vocab):
    choices, _ = puzzle
    oneHotVec = []
    for choice in choices:
        oneHotVec += one_hot(str(choice), vocab)
    return cudaify(FloatTensor(oneHotVec).view(1, -1))
Пример #9
0
 def compare_tensors(self, t1, t2):
     t1 = cudaify(t1)
     t2 = cudaify(t2)
     assert t1.shape == t2.shape
     assert torch.allclose(t1, t2)