Python RRN示例，rrn.RRN Python示例

示例#1

0

显示文件

文件： sudoku.py 项目： dair-iitd/1oML

    def __init__(self, num_steps, embed_size=16, hidden_dim=96, edge_drop=0.1):
        super(SudokuNN, self).__init__()
        self.num_steps = num_steps
        self.digit_embed = nn.Embedding(10, embed_size)
        self.row_embed = nn.Embedding(9, embed_size)
        self.col_embed = nn.Embedding(9, embed_size)
        self.input_layer = nn.Sequential(
            nn.Linear(3 * embed_size, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
        )

        self.lstm = nn.LSTMCell(hidden_dim * 2, hidden_dim, bias=False)

        msg_layer = nn.Sequential(
            nn.Linear(2 * hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
        )

        self.rrn = RRN(msg_layer, self.node_update_func, num_steps, edge_drop)

        self.output_layer = nn.Linear(hidden_dim, 10)

        self.loss_func = nn.CrossEntropyLoss()

示例#2

0

显示文件

def train_rrn(hyperparameters: dict,
              train_inputs: list,
              train_outputs: list,
              other_inputs: dict = None,
              other_outputs: dict = None):
    """
    :param hyperparameters: Check below for what fields must exist in hyperparameters
    :param train_inputs: list of GridStrings
    :param train_outputs: list of GridStrings, corresponding in index to train_inputs
    :param other_inputs: dictionary of GridStrings where the key is name of the dataset
    :param other_outputs: dictionary of GridStrings where the key is name of the dataset,
        corresponding in index to inputs of same name
    :return:
    """

    if other_inputs is None:
        other_inputs = {}
    if other_outputs is None:
        other_outputs = {}
    assert set(other_inputs.keys()) == set(other_outputs.keys())

    if not os.path.exists('./checkpoints'):
        os.makedirs('./checkpoints')
    if not os.path.exists('./logs'):
        os.makedirs('./logs')

    dim_x = hyperparameters['dim_x']
    dim_y = hyperparameters['dim_y']
    num_iters = hyperparameters['num_iters']
    batch_size = hyperparameters['batch_size']
    epochs = hyperparameters['epochs']
    valid_epochs = hyperparameters['valid_epochs']
    save_epochs = hyperparameters['save_epochs']
    embed_size = hyperparameters['embed_size']
    hidden_layer_size = hyperparameters['hidden_layer_size']
    learning_rate = hyperparameters['learning_rate']
    weight_decay = hyperparameters['weight_decay']
    parallel = False

    if 'devices' in hyperparameters:
        if len(hyperparameters['devices']) > 1:
            devices = hyperparameters['devices']
            parallel = True
        device = hyperparameters['devices'][0]
    else:
        device = hyperparameters['device']

    train_x = torch.stack([encode_input(p) for p in train_inputs]).cuda(device)
    train_y = torch.stack([encode_output(p)
                           for p in train_outputs]).cuda(device)

    other_x = {}
    other_y = {}
    for k in other_inputs:
        other_x[k] = torch.stack([encode_input(p)
                                  for p in other_inputs[k]]).cuda(device)
        other_y[k] = torch.stack([encode_output(p)
                                  for p in other_outputs[k]]).cuda(device)

    model = RRN(dim_x=dim_x,
                dim_y=dim_y,
                embed_size=embed_size,
                hidden_layer_size=hidden_layer_size).cuda(device)
    if parallel:
        model = nn.DataParallel(model, device_ids=devices)
    # else:
    #     model = model.cuda(device)

    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=weight_decay)

    train_losses = []  # (epoch, )
    train_accuracies = []  # (epoch, grid, timestep)
    other_losses = {name: [] for name in other_x}  # (epoch, )
    other_accuracies = {name: []
                        for name in other_x}  # (epoch, grid, timestep)
    times = []

    def closure():
        optimizer.zero_grad()
        total_loss = 0
        epoch_accuracies = []
        shuffle_indices = np.arange(len(train_x))
        np.random.shuffle(shuffle_indices)
        for i in tqdm(range(0, len(train_x), batch_size), leave=False):
            x_batch = train_x[shuffle_indices[i:i + batch_size]]
            y_batch = train_y[shuffle_indices[i:i + batch_size]]
            loss, accuracies = get_performance(model=model,
                                               x=x_batch,
                                               y=y_batch,
                                               no_grad=False,
                                               num_iters=num_iters)
            loss.backward()
            total_loss += loss

        train_losses.append(float(total_loss))
        epoch_accuracies.append(accuracies)
        train_accuracies.append(np.concatenate(epoch_accuracies))
        return total_loss

    for i in tqdm(range(epochs)):
        start_time_str = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
        start_time = time.time()

        train_loss = optimizer.step(closure)

        run_validate = i == 0 or (i + 1) % valid_epochs == 0
        if run_validate:
            for name in other_x:
                loss, accuracy = get_performance(model=model,
                                                 x=other_x[name],
                                                 y=other_y[name],
                                                 num_iters=num_iters,
                                                 no_grad=True)
                other_losses[name].append(float(loss))
                other_accuracies[name].append(accuracy)

        if (i + 1) % save_epochs == 0:
            model_filename = "./checkpoints/epoch_{}.mdl".format(i + 1)
            train_data_filename = "./logs/training.pkl"
            print("Saving model to {}".format(model_filename))
            torch.save(model.state_dict(), model_filename)
            with open(train_data_filename, 'wb') as f:
                pickle.dump(
                    {
                        'hyperparameters': hyperparameters,
                        'train_losses': train_losses,
                        'train_accuracies': train_accuracies,
                        'other_losses': other_losses,
                        'other_accuracies': other_accuracies,
                        'times': times
                    }, f)

        end_time_str = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
        end_time = time.time()
        runtime = end_time - start_time
        times.append({
            'start_time': start_time_str,
            'end_time': end_time_str,
            'runtime': runtime
        })
        print("duration: {}s\t iter: {}\t| loss: {}\t| accuracy: {}".format(
            round(runtime, 1), i, round(float(train_loss), 3),
            round(np.mean(train_accuracies[-1][:, -1]), 3)))
        if run_validate:
            for name in sorted(other_x):
                print("data: {}\t| loss: {}\t| accuracy: {}".format(
                    name, round(other_losses[name][-1], 3),
                    round(np.mean(other_accuracies[name][-1][:, -1]), 3)))

    model_filename = "./model.mdl"
    print("Saving model to {}".format(model_filename))
    torch.save(model.state_dict(), model_filename)
    return model

示例#3

0

显示文件

def train_rrn(hyperparameters: dict, data: dict):
    model_name = hyperparameters['model_name']
    device = hyperparameters['device']
    dim_x = hyperparameters['dim_x']
    dim_y = hyperparameters['dim_y']
    num_iters = hyperparameters['num_iters']
    train_size = hyperparameters['train_size']
    valid_size = hyperparameters['valid_size']
    test_size = hyperparameters['test_size']
    batch_size = hyperparameters['batch_size']
    epochs = hyperparameters['epochs']
    save_epochs = hyperparameters['save_epochs']
    embed_size = hyperparameters['embed_size']
    hidden_layer_size = hyperparameters['hidden_layer_size']
    learning_rate = hyperparameters['learning_rate']
    weight_decay = hyperparameters['weight_decay']

    train_inputs = data['train_inputs']
    train_outputs = data['train_outputs']
    valid_inputs = data['valid_inputs']
    valid_outputs = data['valid_outputs']
    test_inputs = data['test_inputs']
    test_outputs = data['test_outputs']

    all_train_x = torch.stack([encode_input(p) for p in train_inputs])
    all_train_y = torch.stack([encode_output(p) for p in train_outputs])
    all_valid_x = torch.stack([encode_input(p) for p in valid_inputs])
    all_valid_y = torch.stack([encode_output(p) for p in valid_outputs])
    all_test_x = torch.stack([encode_input(p) for p in test_inputs])
    all_test_y = torch.stack([encode_output(p) for p in test_outputs])

    model = RRN(dim_x=dim_x,
                dim_y=dim_y,
                embed_size=embed_size,
                hidden_layer_size=hidden_layer_size).cuda(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=weight_decay)

    train_losses = []  # epoch x batch
    train_accuracies = []  # epoch x batch x grid x timestep
    valid_losses = []  # epoch x batch
    valid_accuracies = []  # epoch x batch x grid x timestep
    times = []

    train_x = all_train_x[:train_size].cuda(device)
    train_y = all_train_y[:train_size].cuda(device)
    valid_x = all_valid_x[:valid_size].cuda(device)
    valid_y = all_valid_y[:valid_size].cuda(device)
    test_x = all_test_x[:test_size].cuda(device)
    test_y = all_test_y[:test_size].cuda(device)

    def closure():
        optimizer.zero_grad()
        total_loss = 0
        shuffle_indices = np.arange(len(train_x))
        np.random.shuffle(shuffle_indices)
        for i in tqdm(range(0, len(train_x), batch_size), leave=False):
            x_batch = train_x[shuffle_indices[i:i + batch_size]]
            y_batch = train_y[shuffle_indices[i:i + batch_size]]
            loss, accuracies = get_performance(model, x_batch, y_batch,
                                               num_iters)
            loss.backward()
            total_loss += loss

            train_losses[-1].append(float(loss))
            train_accuracies[-1].append(accuracies)
        return total_loss

    for i in tqdm(range(epochs)):
        start_time_str = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
        start_time = time.time()

        train_losses.append([])
        train_accuracies.append([])

        train_loss = optimizer.step(closure)
        train_accuracies[-1] = np.array(train_accuracies[-1])
        valid_loss, valid_accuracy = get_performance(model, valid_x, valid_y,
                                                     num_iters)
        valid_losses.append(float(valid_loss))
        valid_accuracies.append(valid_accuracy)

        train_accuracies[-1] = np.array(train_accuracies[-1])

        train_loss = round(float(train_loss), 3)
        train_accuracy = round(np.mean(train_accuracies[-1][:, :, -1]), 3)
        valid_loss = round(valid_losses[-1], 3)
        valid_accuracy = round(np.mean(valid_accuracies[-1][:, -1]), 3)

        end_time_str = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
        end_time = time.time()
        runtime = end_time - start_time
        times.append({
            'start_time': start_time_str,
            'end_time': end_time_str,
            'runtime': runtime
        })
        print("({}s): Iter {}\t| TrLoss {}\t| VLoss {}\t| TrAcc {}\t| VAcc {}".
              format(round(runtime, 1), i, train_loss, valid_loss,
                     train_accuracy, valid_accuracy))

        if (i + 1) % save_epochs == 0:
            model_filename = SUDOKU_PATH + "/models/{}_{}.mdl".format(
                model_name, i + 1)
            train_data_filename = SUDOKU_PATH + "/pickles/{}.pkl".format(
                model_name)
            print("Saving model to {}".format(model_filename))
            torch.save(model.state_dict(), model_filename)
            with open(train_data_filename, 'wb') as f:
                pickle.dump(
                    {
                        'hyperparameters': hyperparameters,
                        'train_losses': train_losses,
                        'train_accuracies': train_accuracies,
                        'valid_losses': valid_losses,
                        'valid_accuracies': valid_accuracies,
                        'times': times
                    }, f)
            test_loss, test_accuracy = get_performance(model, test_x, test_y,
                                                       num_iters)
            test_loss = round(float(test_loss), 3)
            test_accuracy = round(np.mean(test_accuracy[:, -1]), 3)
            print("TeLoss {}\t| TeAcc {}".format(test_loss, test_accuracy))

    return model

示例#4

0

显示文件

train_x = torch.stack([rrn_utils.encode_input(p)
                       for p in train_inputs]).cuda(device)
train_y = torch.stack([rrn_utils.encode_output(p)
                       for p in train_outputs]).cuda(device)

other_x = {}
other_y = {}
for k in other_inputs:
    other_x[k] = torch.stack(
        [rrn_utils.encode_input(p) for p in other_inputs[k]]).cuda(device)
    other_y[k] = torch.stack(
        [rrn_utils.encode_output(p) for p in other_outputs[k]]).cuda(device)

# model = EmbedRRN(dim_x=dim_x, dim_y=dim_y, embed_size=embed_size, hidden_layer_size=hidden_layer_size).cuda(device)
model = RRN(dim_x=dim_x,
            dim_y=dim_y,
            embed_size=embed_size,
            hidden_layer_size=hidden_layer_size).cuda(device)

optimizer = optim.Adam(model.parameters(),
                       lr=learning_rate,
                       weight_decay=weight_decay)

# ones = torch.ones(10, 16).cuda(device)


def closure():
    optimizer.zero_grad()
    total_loss = 0
    epoch_accuracies = []
    shuffle_indices = np.arange(len(train_x))
    np.random.shuffle(shuffle_indices)

示例#5

0

显示文件

文件： relnet.py 项目： andrewnam/sudoku

split_inputs, split_outputs = dataset.split_data([
    train_size_per_num_hints,
    train_size_per_num_hints + valid_size_per_num_hints
])

train_inputs = split_inputs[0]
train_outputs = split_outputs[0]
other_inputs = {'validation': split_inputs[1]}
other_outputs = {'validation': split_outputs[1]}

# model = RelNet(dim_x=hp['dim_x'],
#                dim_y=hp['dim_y'],
#                embed_size=hp['embed_size'],
#                hidden_layer_size=hp['hidden_layer_size']).cuda(hp['device'])
model = RRN(dim_x=hp['dim_x'],
            dim_y=hp['dim_y'],
            embed_size=hp['embed_size'],
            hidden_layer_size=hp['hidden_layer_size']).cuda(hp['device'])

optimizer = optim.Adam(model.parameters(),
                       lr=hp['learning_rate'],
                       weight_decay=hp['weight_decay'])

train_x = torch.stack([rrn_utils.encode_input(p)
                       for p in train_inputs]).cuda(hp['device'])
# train_x = utils.one_hot_encode(train_x)
train_y = torch.stack([rrn_utils.encode_output(p)
                       for p in train_outputs]).cuda(hp['device'])

other_x = {}
other_y = {}
for k in other_inputs:

示例#6

0

显示文件

文件： train.py 项目： VictorZuanazzi/DeepLearning201

def train(config):
    
    #print parameters
    print_config(config)
    
    config.model_type = config.model_type.lower()
    assert config.model_type in ('rnn', 'lstm', 'rrn')
    
    # Initialize the device which to run the model on
    wanted_device = config.device.lower()
    if wanted_device == 'cuda':
        #check if cuda is available
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    else:
        #cpu is the standard option
        device = torch.device('cpu')
        
    
    # Initialize the model that we are going to use    
    if config.model_type == 'rnn':
        model = VanillaRNN(seq_length = config.input_length,
                           input_dim = config.input_dim,
                           num_hidden = config.num_hidden,
                           num_classes = config.num_classes,
                           batch_size = config.batch_size,
                           device = device)
    elif config.model_type == 'lstm':
        model = LSTM(seq_length = config.input_length,
                       input_dim = config.input_dim,
                       num_hidden = config.num_hidden,
                       num_classes = config.num_classes,
                       batch_size = config.batch_size,
                       device = device)
    elif config.model_type == 'rrn':
        model = RRN(seq_length = config.input_length,
                       input_dim = config.input_dim,
                       num_hidden = config.num_hidden,
                       num_classes = config.num_classes,
                       batch_size = config.batch_size,
                       device = device)
        

    # Initialize the dataset and data loader (note the +1)
    dataset = PalindromeDataset(config.input_length+1)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=0)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()

    optimizer = torch.optim.RMSprop(model.parameters(), 
                                        lr=config.learning_rate)
    
    #keep stats
    train_acc = np.zeros(config.train_steps+1)
    first_best_acc = 0
    acc_MA = 0
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        #batches to torch tensors
        x = torch.tensor(batch_inputs, dtype=torch.float, device=device)
        y_true = torch.tensor(batch_targets, dtype=torch.long, device=device)

        #Forward pass
        y_pred = model.forward(x)
        loss = criterion(y_pred, y_true)
        
        #Backward pass
        optimizer.zero_grad()
        loss.backward()
        
        ############################################################################
        # QUESTION: what happens here and why?
        # clip_grad_norm() is a method to avoid exploding gradients. It clips 
        # gradients above max_norm to max_norm.
        #Deprecated, use clip_grad_norm_() instead
        ############################################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
        ############################################################################

        optimizer.step()
        
        train_acc[step] = accuracy(y_pred, y_true, config)        

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/(float(t2-t1) + 1e-6)

        if step % config.print_every == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    train_acc[step], loss
            ))
            print(f"x: {x[0,:]}, y_pred: {y_pred[0,:].argmax()}, y_true: {y_true[0]}")
            
        acc_MA = train_acc[step-4:step+1].sum()/5
        if step == config.train_steps or acc_MA == 1.0:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
    #Save the final model
    torch.save(model, config.model_type + "_model.pt")
    np.save("train_acc_" + config.model_type + str(config.input_length), train_acc)
    
    if config.experiment:
        stats = {}
        stats["last acc"] = train_acc[-1]
        first_best_acc = np.argmax(train_acc)
        stats["best acc"] = train_acc[first_best_acc]
        stats["step best acc"] = first_best_acc
        stats["num steps"] = len(train_acc)
        stats["accs"] = train_acc
        return stats