Пример #1
0
def main(graph_type="dependency", epochs=25):
    root = os.path.join(os.path.dirname(__file__), "../../")
    dataset = MultiNLIDataset(root)

    if graph_type == "dependency":
        graph_builder = DependencyGraph(lang="en")
    elif graph_type == "similarity":
        graph_builder = SimilarityGraph(lang="en")
    else:
        graph_builder = StaticGraph(lang="en")

    trainer = Trainer(graph_builder, root, log_dir="classifier")
    trainer.build()

    sequence_length = 25
    vocab_size = len(trainer.preprocessor.vocabulary.get())

    def preprocessor(x):
        _x = trainer.preprocess(x, sequence_length)
        values = (_x["text"], _x["graph"])
        return values

    model = GraphBasedClassifier(vocab_size, sequence_length, lstm=None)
    model.build(trainer.num_classes, preprocessor)

    metrics = trainer.train(model.model, epochs=epochs)

    test_data = dataset.test_data()
    y_pred = model.predict(test_data["text"])

    print(
        classification_report(test_data["label"],
                              y_pred,
                              target_names=dataset.labels()))
Пример #2
0
def main():
    root = os.path.join(os.path.dirname(__file__), "../../")
    dataset = MultiNLIDataset(root)
    trainer = BaselineTrainer(root, log_dir="classifier_baseline")
    trainer.build()
    sequence_length = 25

    vocab_size = len(trainer.preprocessor.vocabulary.get())

    def preprocessor(x):
        _x = trainer.preprocess(x, sequence_length)
        return _x["text"]

    model = MergeClassifier(vocab_size)
    model.build(trainer.num_classes, preprocessor)

    metrics = trainer.train(model.model,
                            epochs=25,
                            sequence_length=sequence_length,
                            representation="GloVe.6B.100d")

    test_data = dataset.test_data()
    y_pred = model.predict(test_data["text"])

    print(
        classification_report(test_data["label"],
                              y_pred,
                              target_names=dataset.labels()))
Пример #3
0
def main():
    root = os.path.join(os.path.dirname(__file__), "../../")
    dataset = MultiNLIDataset(root)
    classifier = TfidfClassifier()

    train_data = dataset.train_data()
    scores = classifier.fit(train_data["text"], train_data["label"])

    test_data = dataset.test_data()
    y_pred = classifier.predict(test_data["text"])

    print(classification_report(test_data["label"], y_pred,
                                target_names=dataset.labels()))
Пример #4
0
 def num_classes(self):
     return len(MultiNLIDataset.labels())