Пример #1
0
def test_linear_train(monkeypatch: Any) -> None:
    """Test training loop for a linear module."""
    set_seed(1000)

    monkeypatch.setattr(wandb, 'log', _noop)

    df = read_test_data()
    train_dataset = mltd.MeldLinearTextDataset(df, mode='emotion')
    dev_dataset = mltd.MeldLinearTextDataset(df,
                                             mode='emotion',
                                             vocab=train_dataset.vocab)
    glove_file = StringIO(tm.glove_str)

    train_loader = mltd.meld_linear_text_daloader(
        dataset=train_dataset,
        batch_size=tm.batch_size,
    )
    dev_loader = mltd.meld_linear_text_daloader(
        dataset=dev_dataset,
        batch_size=tm.batch_size,
    )

    classifier = glove_simple(
        glove_path=glove_file,
        glove_dim=tm.glove_dim,
        num_classes=tm.num_classes,
        vocab=train_dataset.vocab,
    )

    train(model=classifier, trainloader=train_loader, devloader=dev_loader)
Пример #2
0
def test_train_weights(monkeypatch: Any) -> None:
    """Test training loop with weights for loss function."""
    set_seed(1000)
    weights = torch.tensor([4.0, 15.0, 15.0, 3.0, 1.0, 6.0, 3.0])

    monkeypatch.setattr(wandb, 'log', _noop)

    df = read_test_data()
    train_dataset = mltd.MeldLinearTextDataset(df, mode='emotion')
    dev_dataset = mltd.MeldLinearTextDataset(df,
                                             mode='emotion',
                                             vocab=train_dataset.vocab)
    glove_file = StringIO(tm.glove_str)

    train_loader = mltd.meld_linear_text_daloader(
        dataset=train_dataset,
        batch_size=tm.batch_size,
    )
    dev_loader = mltd.meld_linear_text_daloader(
        dataset=dev_dataset,
        batch_size=tm.batch_size,
    )

    classifier = glove_simple(
        glove_path=glove_file,
        glove_dim=tm.glove_dim,
        num_classes=tm.num_classes,
        vocab=train_dataset.vocab,
    )

    train(model=classifier,
          trainloader=train_loader,
          devloader=dev_loader,
          weights=weights)
Пример #3
0
def load_mltd(args: argparse.Namespace) -> Dataloaders:
    """Load data for a MeldLinearText dataset."""
    train_data = mltd.MeldLinearTextDataset(
        data=args.train_data,
        mode=args.mode,
    )

    trainloader = mltd.meld_linear_text_daloader(
        dataset=train_data,
        batch_size=args.batch_size,
    )

    if args.dev_data:
        dev_data = mltd.MeldLinearTextDataset(
            data=args.dev_data,
            mode=args.mode,
            vocab=train_data.vocab,
        )
        devloader = mltd.meld_linear_text_daloader(
            dataset=dev_data,
            batch_size=args.batch_size,
        )
    else:
        devloader = None

    return Dataloaders(trainloader=trainloader, devloader=devloader)
Пример #4
0
def test_random_simple() -> None:
    df = read_test_data()
    dataset = MeldLinearTextDataset(df, mode='emotion')

    loader = meld_linear_text_daloader(
        dataset=dataset,
        batch_size=batch_size,
    )

    classifier = random_emb_simple(
        vocab_size=dataset.vocab_size(),
        embedding_dim=embedding_dim,
        num_classes=num_classes,
    )

    for batch in loader:
        predictions, _ = classifier(batch.tokens, batch.labels)
        assert predictions.shape == (batch_size, num_classes)
Пример #5
0
def test_linear_dataloader() -> None:
    df = read_test_data()
    dataset = MeldLinearTextDataset(df)
    loader = meld_linear_text_daloader(
        dataset=dataset,
        batch_size=3,
    )
    length0 = len(test_tokens[0])
    length1 = len(test_tokens[1])
    length2 = len(test_tokens[2])
    max_length = max(length0, length1, length2)

    for batch in loader:
        assert batch.dialogue_ids.equal(torch.tensor([1, 0, 0]))
        assert batch.utterance_ids.equal(torch.tensor([0, 0, 1]))
        assert batch.labels.equal(torch.tensor([1, 2, 2]))
        assert batch.lengths.equal(torch.tensor([length2, length0, length1]))
        assert all(len(seq) == max_length for seq in batch.tokens)
Пример #6
0
def test_linear_rnn() -> None:
    "Test if Linear Rnn GloVe loader works with synthetic data"
    df = read_test_data()
    dataset = MeldLinearTextDataset(df, mode='emotion')
    glove_file = StringIO(glove_str)

    loader = meld_linear_text_daloader(
        dataset=dataset,
        batch_size=batch_size,
    )

    classifier = glove_linear_lstm(
        glove_path=glove_file,
        glove_dim=glove_dim,
        num_classes=num_classes,
        vocab=dataset.vocab,
    )

    for batch in loader:
        predictions, _ = classifier(batch.tokens, batch.labels)
        assert predictions.shape == (batch_size, num_classes)
Пример #7
0
def test_linear_cnn_rnn() -> None:
    """Test if Linear Cnn+Rnn model works with synthetic data."""
    df = read_test_data()
    dataset = MeldLinearTextDataset(df, mode='emotion')
    glove_file = StringIO(glove_str)

    loader = meld_linear_text_daloader(
        dataset=dataset,
        batch_size=3,
    )

    classifier = glove_linear_cnn_lstm(
        glove_path=glove_file,
        glove_dim=glove_dim,
        num_classes=num_classes,
        vocab=dataset.vocab,
        filters=[3, 5],
        out_channels=3,
    )

    for batch in loader:
        predictions, _ = classifier(batch.tokens, batch.labels)
        assert predictions.shape == (batch_size, num_classes)