Пример #1
0
def train(samples,
          labels,
          embeddings,
          n_epochs=2,
          vocab_size=-1,
          lr=0.01,
          use_pretrained_wv=False,
          n_classes=2,
          pad_id=0,
          batch_size=1):
    lens = [len(s) for s in samples]
    lens_samples_labels = list(zip(lens, samples, labels))
    model = rnn_model_batch.rnn_model(device,
                                      word_embedding=embeddings,
                                      n_classes=n_classes,
                                      vocab_size=vocab_size,
                                      use_pretrained_wv=use_pretrained_wv,
                                      bi=bi)
    model = model.cuda()
    print(f' created RNN (GRU) model: {model}')
    # criterion = nn.NLLLoss()
    criterion = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(model.parameters(), lr=lr)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for e in range(1, n_epochs + 1):
        t0 = time()
        losses = []
        shuffle(lens_samples_labels)
        for batch_lens_samples_labels in utils.get_batch(
                lens_samples_labels, batch_size=batch_size):
            batch_lens_samples_labels = sorted(batch_lens_samples_labels,
                                               reverse=True)
            batch_lens, sents, labels = zip(*batch_lens_samples_labels)
            batch_lens = list(batch_lens)
            max_sent_len = batch_lens[0]
            sents = [
                utils.pad_token_ids(s, pad_id, max_len=max_sent_len)
                for s in sents
            ]
            sents = torch.tensor(sents, device=device, dtype=torch.long)
            labels = torch.tensor(labels, device=device, dtype=torch.long)
            optimizer.zero_grad()
            hidden = model.initHidden(batch_size=len(labels), bi=bi)
            logprobs = model(sents, batch_lens, hidden)
            loss = criterion(logprobs, labels)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()

        epoch_loss = sum(losses) / len(losses)
        epoch_time = round(time() - t0, 2)
        print(f'epoch: {e}, time: {epoch_time}, loss: {epoch_loss}')

    return model
Пример #2
0
def train(train_loader,
          embeddings,
          n_epochs=2,
          vocab_size=-1,
          lr=0.01,
          use_pretrained_wv=False,
          n_classes=2):
    model = rnn_model_batch.rnn_model(device,
                                      word_embedding=embeddings,
                                      n_classes=n_classes,
                                      vocab_size=vocab_size,
                                      use_pretrained_wv=use_pretrained_wv,
                                      bi=bi)
    model = model.cuda()
    print(f' created RNN (GRU) model: {model}')
    # criterion = nn.NLLLoss()
    criterion = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(model.parameters(), lr=lr)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()
    for e in range(1, n_epochs + 1):
        t0 = time()
        losses = []
        for batch_data, batch_labels in train_loader:
            sents = batch_data
            labels = batch_labels
            optimizer.zero_grad()
            hidden = model.initHidden(batch_size=len(labels), bi=bi)
            logprobs = model(sents, hidden, is_batch=True)
            loss = criterion(logprobs, labels)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()

        epoch_loss = sum(losses) / len(losses)
        epoch_time = round(time() - t0, 2)
        print(f'epoch: {e}, time: {epoch_time}, loss: {epoch_loss}')

    return model
Пример #3
0
def train(samples,
          labels,
          embeddings,
          n_epochs=2,
          vocab_size=-1,
          lr=0.01,
          use_pretrained_wv=False,
          pad_id=0,
          batch_size=1,
          test_dict=None,
          test_every=0,
          use_wt_loss=True,
          max_len=64):
    lens = [len(s) for s in samples]
    lens_samples_labels = list(zip(lens, samples, labels))
    n_classes = len(set(labels))
    model = rnn_model_batch.rnn_model(device,
                                      word_embedding=embeddings,
                                      n_classes=n_classes,
                                      vocab_size=vocab_size,
                                      use_pretrained_wv=use_pretrained_wv,
                                      bi=bi,
                                      n_layers=1,
                                      max_len=max_len)
    model = model.cuda()
    print(f' created RNN (GRU) model: {model}')
    if use_wt_loss:
        label_freq_counts = Counter(labels)
        print('class freq dist: ', sorted(label_freq_counts.items()))
        # total_labels_count = len(labels)
        # label_probs = {label: count / total_labels_count for label, count in label_freq_counts.items()}
        # wts = [label_probs[label] for label in sorted(label_probs.keys())]
        wts = [
            1. / label_freq_counts[label]
            for label in sorted(label_freq_counts.keys())
        ]
        # wts = [-np.log10(w) for w in wts]
        wts = torch.tensor(wts, device=device, dtype=torch.float)
        print('weights for loss function: ', wts)
        criterion = nn.CrossEntropyLoss(weight=wts, reduction='sum')
    else:
        criterion = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(model.parameters(), lr=lr)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    # optimizer = optim.RMSprop(model.parameters(), lr=lr)

    for e in range(1, n_epochs + 1):
        model.train()
        t0 = time()
        losses = []
        shuffle(lens_samples_labels)
        for batch_lens_samples_labels in utils.get_batch(
                lens_samples_labels, batch_size=batch_size):
            batch_lens_samples_labels = sorted(batch_lens_samples_labels,
                                               reverse=True)
            batch_lens, sents, labels = zip(*batch_lens_samples_labels)
            batch_lens = list(batch_lens)
            sents = [
                utils.pad_token_ids(s, pad_id, max_len=max_len) for s in sents
            ]
            sents = torch.tensor(sents, device=device, dtype=torch.long)
            labels = torch.tensor(labels, device=device, dtype=torch.long)
            optimizer.zero_grad()
            hidden = model.initHidden(batch_size=len(labels), bi=bi)
            logprobs = model(sents, batch_lens, hidden)
            loss = criterion(logprobs, labels)
            losses.append(loss.item())
            loss.backward()
            zero_tensor = torch.zeros(100, device=device)
            model.embedding._parameters['weight'].grad[0] = zero_tensor
            optimizer.step()

        epoch_loss = sum(losses) / len(losses)
        epoch_time = round(time() - t0, 2)
        print(f'epoch: {e}, time: {epoch_time} sec., loss: {epoch_loss}')
        if e % test_every == 0:
            if test_dict:
                X_test = test_dict['X_test']
                y_test = test_dict['y_test']
                pad_id = test_dict['pad_id']
                a, f, p, r = test_batch(model,
                                        X_test,
                                        y_test,
                                        pad_id,
                                        batch_size=1,
                                        max_len=max_len)
                print('*' * 80)
                print(
                    f'epoch: {e}, on TEST SET acc: {a}, f1: {f}, precision: {p}, recall: {r}'
                )
                print('*' * 80)

    return model