Exemplo n.º 1
0
def train_output_self(model, epochs, training_data, c2i):
    """
    Train model for the specified number of epochs, over the provided training data.
    
    Make sure to shuffle the training data at the beginning of each epoch!
    """
    opt = torch.optim.Adam(model.parameters())
    loss_func = torch.nn.NLLLoss(
    )  # since our model gives negative log probs on the output side

    for i in range(epochs):
        random.shuffle(training_data)
        loss = 0

        for idx, word in enumerate(training_data):

            opt.zero_grad()
            word_tens = vocab.sentence_to_tensor(word, c2i, True)
            x_tens = word_tens[:, :-1]
            y_tens = word_tens[:, 1:]

            y_hat, _ = model(x_tens, model.init_hidden())

            loss += loss_func(y_hat.squeeze(), y_tens.squeeze())

            if idx % 5000 == 0:
                print(f"{idx}/{len(training_data)}, loss: {loss}")

            # send back gradients:
            loss.backward()
            # now, tell the optimizer to update our weights:
            opt.step()
            loss = 0

    return model
Exemplo n.º 2
0
def eval_acc(model, test_data, c2i, i2c, o2i, i2o):
    """
    Compute classification accuracy for the test_data against the model.
    
    :param model: The trained model to use
    :param test_data: A list of (x,y) test pairs.
    :returns: The classification accuracy (n_correct / n_total), as well as the predictions
    :rtype: tuple(float, list(str))
    """
    in_col_1 = 'X1'
    in_col_2 = 'X2'
    out_col = 'y'

    X1_tensor_seq = test_data.loc[:, in_col_1].apply(
        lambda x: vocab.sentence_to_tensor(x, c2i, True))
    X2_tensor_seq = test_data.loc[:, in_col_2].apply(
        lambda x: vocab.sentence_to_tensor(x, c2i, False))

    y_int_seq = test_data.loc[:, out_col].apply(lambda x: o2i[x])

    correct = []
    labs = []
    indexes = X1_tensor_seq.index
    data = zip(X1_tensor_seq, X2_tensor_seq, y_int_seq)
    with torch.no_grad():
        for X1, X2, y in data:
            X = torch.cat((X2, X1), 1)

            out = model.forward(X, model.init_hidden())[0]
            i = np.argmax(out)

            labs.append(i2o[i.item()])
            correct.append(i == y)

    yes = 0.0
    no = 0.0
    total = 0.0
    for entry in correct:
        total += 1
        if entry:
            yes += 1
        else:
            no += 1

    return (yes / total, list(labs))
Exemplo n.º 3
0
def train_output_y2(model, epochs, training_inputs_1, training_inputs_2, c2i,
                    training_outputs, o2i):
    """
    Train model for the specified number of epochs, over the provided training data.
    
    Make sure to shuffle the training data at the beginning of each epoch!
    """
    opt = torch.optim.Adam(model.parameters())
    loss_func = torch.nn.NLLLoss(
    )  # since our model gives negative log probs on the output side

    for i in range(epochs):
        training_inputs_1, training_inputs_2, training_outputs = shuffle(
            training_inputs_1, training_inputs_2, training_outputs)
        loss = 0

        data = zip(training_inputs_1, training_inputs_2, training_outputs)

        for idx, datum in enumerate(data):
            t1 = datum[0]
            t2 = datum[1]
            output = datum[2]

            opt.zero_grad()

            t1_tens = vocab.sentence_to_tensor(t1, c2i, True)
            t2_tens = vocab.sentence_to_tensor(t2, c2i, False)

            #x_tens = torch.cat((t2_tens,t1_tens),1)

            y_tens = vocab.sentence_to_tensor([output], vocab=o2i)

            y_hat, _ = model(t1_tens, t2_tens, model.init_hidden())
            loss += loss_func(y_hat, y_tens[0])

            if idx % 1000 == 0:
                print(f"{idx}/{len(training_inputs_1)}, loss: {loss}")

            # send back gradients:
            loss.backward()
            # now, tell the optimizer to update our weights:
            opt.step()
            loss = 0

    return model
Exemplo n.º 4
0
def compute_prob(model, sentence, c2i):
    """
    Compute the negative log probability of p(sentence)
    
    Equivalent to equation 3.3 in Jurafsky & Martin.
    """

    nll = nn.NLLLoss(reduction='sum')

    with torch.no_grad():
        s_tens = vocab.sentence_to_tensor(sentence, c2i, True)
        x = s_tens[:, :-1]
        y = s_tens[:, 1:]
        y_hat, _ = model(x, model.init_hidden())
        return nll(
            y_hat.squeeze(),
            y.squeeze().long()).item()  # get rid of first dimension of each