Exemplo n.º 1
0
    indexer.index_dataset(sentences)

    train_sentences = sentences[:-1000]
    dev_sentences = sentences[-1000:]

    train_dataset = RuPosDataset(train_sentences, indexer, device)
    train_sampler = RandomSampler(train_dataset)
    train_iterator = DataLoader(train_dataset,
                                batch_size=256,
                                sampler=train_sampler,
                                collate_fn=train_dataset.collate_fn)

    dev_dataset = RuPosDataset(dev_sentences, indexer, device)
    dev_sampler = SequentialSampler(dev_dataset)
    dev_iterator = DataLoader(dev_dataset,
                              batch_size=256,
                              sampler=dev_sampler,
                              collate_fn=dev_dataset.collate_fn)

    embeddings = load_embeddings(indexer.token_vocab, 'data/cc.ru.300.vec')
    model = SimpleTagger(output_dim=len(indexer.pos_vocab),
                         embedding_matrix=embeddings)
    model.to(device)

    trainer = Trainer(model, train_iterator, dev_iterator)

    for i in range(20):
        print('Epoch: %d' % (i + 1))
        trainer.train_epoch()
        trainer.test_epoch()