示例#1
0
def train_rrn(hyperparameters: dict, data: dict):
    model_name = hyperparameters['model_name']
    device = hyperparameters['device']
    dim_x = hyperparameters['dim_x']
    dim_y = hyperparameters['dim_y']
    num_iters = hyperparameters['num_iters']
    train_size = hyperparameters['train_size']
    valid_size = hyperparameters['valid_size']
    test_size = hyperparameters['test_size']
    batch_size = hyperparameters['batch_size']
    epochs = hyperparameters['epochs']
    save_epochs = hyperparameters['save_epochs']
    embed_size = hyperparameters['embed_size']
    hidden_layer_size = hyperparameters['hidden_layer_size']
    learning_rate = hyperparameters['learning_rate']
    weight_decay = hyperparameters['weight_decay']

    train_inputs = data['train_inputs']
    train_outputs = data['train_outputs']
    valid_inputs = data['valid_inputs']
    valid_outputs = data['valid_outputs']
    test_inputs = data['test_inputs']
    test_outputs = data['test_outputs']

    all_train_x = torch.stack([encode_input(p) for p in train_inputs])
    all_train_y = torch.stack([encode_output(p) for p in train_outputs])
    all_valid_x = torch.stack([encode_input(p) for p in valid_inputs])
    all_valid_y = torch.stack([encode_output(p) for p in valid_outputs])
    all_test_x = torch.stack([encode_input(p) for p in test_inputs])
    all_test_y = torch.stack([encode_output(p) for p in test_outputs])

    model = RRN(dim_x=dim_x,
                dim_y=dim_y,
                embed_size=embed_size,
                hidden_layer_size=hidden_layer_size).cuda(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=weight_decay)

    train_losses = []  # epoch x batch
    train_accuracies = []  # epoch x batch x grid x timestep
    valid_losses = []  # epoch x batch
    valid_accuracies = []  # epoch x batch x grid x timestep
    times = []

    train_x = all_train_x[:train_size].cuda(device)
    train_y = all_train_y[:train_size].cuda(device)
    valid_x = all_valid_x[:valid_size].cuda(device)
    valid_y = all_valid_y[:valid_size].cuda(device)
    test_x = all_test_x[:test_size].cuda(device)
    test_y = all_test_y[:test_size].cuda(device)

    def closure():
        optimizer.zero_grad()
        total_loss = 0
        shuffle_indices = np.arange(len(train_x))
        np.random.shuffle(shuffle_indices)
        for i in tqdm(range(0, len(train_x), batch_size), leave=False):
            x_batch = train_x[shuffle_indices[i:i + batch_size]]
            y_batch = train_y[shuffle_indices[i:i + batch_size]]
            loss, accuracies = get_performance(model, x_batch, y_batch,
                                               num_iters)
            loss.backward()
            total_loss += loss

            train_losses[-1].append(float(loss))
            train_accuracies[-1].append(accuracies)
        return total_loss

    for i in tqdm(range(epochs)):
        start_time_str = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
        start_time = time.time()

        train_losses.append([])
        train_accuracies.append([])

        train_loss = optimizer.step(closure)
        train_accuracies[-1] = np.array(train_accuracies[-1])
        valid_loss, valid_accuracy = get_performance(model, valid_x, valid_y,
                                                     num_iters)
        valid_losses.append(float(valid_loss))
        valid_accuracies.append(valid_accuracy)

        train_accuracies[-1] = np.array(train_accuracies[-1])

        train_loss = round(float(train_loss), 3)
        train_accuracy = round(np.mean(train_accuracies[-1][:, :, -1]), 3)
        valid_loss = round(valid_losses[-1], 3)
        valid_accuracy = round(np.mean(valid_accuracies[-1][:, -1]), 3)

        end_time_str = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
        end_time = time.time()
        runtime = end_time - start_time
        times.append({
            'start_time': start_time_str,
            'end_time': end_time_str,
            'runtime': runtime
        })
        print("({}s): Iter {}\t| TrLoss {}\t| VLoss {}\t| TrAcc {}\t| VAcc {}".
              format(round(runtime, 1), i, train_loss, valid_loss,
                     train_accuracy, valid_accuracy))

        if (i + 1) % save_epochs == 0:
            model_filename = SUDOKU_PATH + "/models/{}_{}.mdl".format(
                model_name, i + 1)
            train_data_filename = SUDOKU_PATH + "/pickles/{}.pkl".format(
                model_name)
            print("Saving model to {}".format(model_filename))
            torch.save(model.state_dict(), model_filename)
            with open(train_data_filename, 'wb') as f:
                pickle.dump(
                    {
                        'hyperparameters': hyperparameters,
                        'train_losses': train_losses,
                        'train_accuracies': train_accuracies,
                        'valid_losses': valid_losses,
                        'valid_accuracies': valid_accuracies,
                        'times': times
                    }, f)
            test_loss, test_accuracy = get_performance(model, test_x, test_y,
                                                       num_iters)
            test_loss = round(float(test_loss), 3)
            test_accuracy = round(np.mean(test_accuracy[:, -1]), 3)
            print("TeLoss {}\t| TeAcc {}".format(test_loss, test_accuracy))

    return model
示例#2
0
def train_rrn(hyperparameters: dict,
              train_inputs: list,
              train_outputs: list,
              other_inputs: dict = None,
              other_outputs: dict = None):
    """
    :param hyperparameters: Check below for what fields must exist in hyperparameters
    :param train_inputs: list of GridStrings
    :param train_outputs: list of GridStrings, corresponding in index to train_inputs
    :param other_inputs: dictionary of GridStrings where the key is name of the dataset
    :param other_outputs: dictionary of GridStrings where the key is name of the dataset,
        corresponding in index to inputs of same name
    :return:
    """

    if other_inputs is None:
        other_inputs = {}
    if other_outputs is None:
        other_outputs = {}
    assert set(other_inputs.keys()) == set(other_outputs.keys())

    if not os.path.exists('./checkpoints'):
        os.makedirs('./checkpoints')
    if not os.path.exists('./logs'):
        os.makedirs('./logs')

    dim_x = hyperparameters['dim_x']
    dim_y = hyperparameters['dim_y']
    num_iters = hyperparameters['num_iters']
    batch_size = hyperparameters['batch_size']
    epochs = hyperparameters['epochs']
    valid_epochs = hyperparameters['valid_epochs']
    save_epochs = hyperparameters['save_epochs']
    embed_size = hyperparameters['embed_size']
    hidden_layer_size = hyperparameters['hidden_layer_size']
    learning_rate = hyperparameters['learning_rate']
    weight_decay = hyperparameters['weight_decay']
    parallel = False

    if 'devices' in hyperparameters:
        if len(hyperparameters['devices']) > 1:
            devices = hyperparameters['devices']
            parallel = True
        device = hyperparameters['devices'][0]
    else:
        device = hyperparameters['device']

    train_x = torch.stack([encode_input(p) for p in train_inputs]).cuda(device)
    train_y = torch.stack([encode_output(p)
                           for p in train_outputs]).cuda(device)

    other_x = {}
    other_y = {}
    for k in other_inputs:
        other_x[k] = torch.stack([encode_input(p)
                                  for p in other_inputs[k]]).cuda(device)
        other_y[k] = torch.stack([encode_output(p)
                                  for p in other_outputs[k]]).cuda(device)

    model = RRN(dim_x=dim_x,
                dim_y=dim_y,
                embed_size=embed_size,
                hidden_layer_size=hidden_layer_size).cuda(device)
    if parallel:
        model = nn.DataParallel(model, device_ids=devices)
    # else:
    #     model = model.cuda(device)

    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=weight_decay)

    train_losses = []  # (epoch, )
    train_accuracies = []  # (epoch, grid, timestep)
    other_losses = {name: [] for name in other_x}  # (epoch, )
    other_accuracies = {name: []
                        for name in other_x}  # (epoch, grid, timestep)
    times = []

    def closure():
        optimizer.zero_grad()
        total_loss = 0
        epoch_accuracies = []
        shuffle_indices = np.arange(len(train_x))
        np.random.shuffle(shuffle_indices)
        for i in tqdm(range(0, len(train_x), batch_size), leave=False):
            x_batch = train_x[shuffle_indices[i:i + batch_size]]
            y_batch = train_y[shuffle_indices[i:i + batch_size]]
            loss, accuracies = get_performance(model=model,
                                               x=x_batch,
                                               y=y_batch,
                                               no_grad=False,
                                               num_iters=num_iters)
            loss.backward()
            total_loss += loss

        train_losses.append(float(total_loss))
        epoch_accuracies.append(accuracies)
        train_accuracies.append(np.concatenate(epoch_accuracies))
        return total_loss

    for i in tqdm(range(epochs)):
        start_time_str = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
        start_time = time.time()

        train_loss = optimizer.step(closure)

        run_validate = i == 0 or (i + 1) % valid_epochs == 0
        if run_validate:
            for name in other_x:
                loss, accuracy = get_performance(model=model,
                                                 x=other_x[name],
                                                 y=other_y[name],
                                                 num_iters=num_iters,
                                                 no_grad=True)
                other_losses[name].append(float(loss))
                other_accuracies[name].append(accuracy)

        if (i + 1) % save_epochs == 0:
            model_filename = "./checkpoints/epoch_{}.mdl".format(i + 1)
            train_data_filename = "./logs/training.pkl"
            print("Saving model to {}".format(model_filename))
            torch.save(model.state_dict(), model_filename)
            with open(train_data_filename, 'wb') as f:
                pickle.dump(
                    {
                        'hyperparameters': hyperparameters,
                        'train_losses': train_losses,
                        'train_accuracies': train_accuracies,
                        'other_losses': other_losses,
                        'other_accuracies': other_accuracies,
                        'times': times
                    }, f)

        end_time_str = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
        end_time = time.time()
        runtime = end_time - start_time
        times.append({
            'start_time': start_time_str,
            'end_time': end_time_str,
            'runtime': runtime
        })
        print("duration: {}s\t iter: {}\t| loss: {}\t| accuracy: {}".format(
            round(runtime, 1), i, round(float(train_loss), 3),
            round(np.mean(train_accuracies[-1][:, -1]), 3)))
        if run_validate:
            for name in sorted(other_x):
                print("data: {}\t| loss: {}\t| accuracy: {}".format(
                    name, round(other_losses[name][-1], 3),
                    round(np.mean(other_accuracies[name][-1][:, -1]), 3)))

    model_filename = "./model.mdl"
    print("Saving model to {}".format(model_filename))
    torch.save(model.state_dict(), model_filename)
    return model
示例#3
0
other_x = {}
other_y = {}
for k in other_inputs:
    other_x[k] = torch.stack(
        [rrn_utils.encode_input(p) for p in other_inputs[k]]).cuda(device)
    other_y[k] = torch.stack(
        [rrn_utils.encode_output(p) for p in other_outputs[k]]).cuda(device)

# model = EmbedRRN(dim_x=dim_x, dim_y=dim_y, embed_size=embed_size, hidden_layer_size=hidden_layer_size).cuda(device)
model = RRN(dim_x=dim_x,
            dim_y=dim_y,
            embed_size=embed_size,
            hidden_layer_size=hidden_layer_size).cuda(device)

optimizer = optim.Adam(model.parameters(),
                       lr=learning_rate,
                       weight_decay=weight_decay)

# ones = torch.ones(10, 16).cuda(device)


def closure():
    optimizer.zero_grad()
    total_loss = 0
    epoch_accuracies = []
    shuffle_indices = np.arange(len(train_x))
    np.random.shuffle(shuffle_indices)

    for i in tqdm(range(0, len(train_x), batch_size), leave=False):
        x_batch = train_x[shuffle_indices[i:i + batch_size]]