Пример #1
0
    def test_fit_predict(self):
        """
        Ensure model training does not error out
        Ensure model returns predictions of the right type
        """

        model = Comparison(**self.default_config())
        n_samples = 10
        model.fit([[
            "Transformers was a terrible movie but a great model",
            "Transformers are a great model but a terrible movie"
        ]] * n_samples, ['yes'] * n_samples)

        test_data = [[
            "Transformers was a terrible movie but a great model",
            "Transformers are a great model but a terrible movie"
        ]]

        predictions = model.predict(test_data)
        for prediction in predictions:
            self.assertIsInstance(prediction, (str, bytes))

        probabilities = model.predict_proba(test_data)
        for proba in probabilities:
            self.assertIsInstance(proba, dict)
Пример #2
0
    def test_fit_predict(self):
        """
        Ensure model training does not error out
        Ensure model returns predictions of the right type
        """

        model = Comparison(**self.default_config())
        n_samples = 10
        model.fit(["Indico is the best"] * n_samples, ["Indico is the bestestestest"] * n_samples, ['yes'] * n_samples)

        predictions = model.predict(["Is indico the best?"], ["Indico is the bestestestest"])
        for prediction in predictions:
            self.assertIsInstance(prediction, (str, bytes))
Пример #3
0
 def test_comparison_auxiliary(self):
     """
     Ensure model training does not error out
     Ensure model returns reasonable predictions
     """
     model = Comparison(**self.default_config(
         chunk_long_sequences=False, max_length=50, batch_size=4))
     trainX = [['i like apples', 'i like apples']] * 4
     trainY = ['A', 'B', 'C', 'D']
     train_context = [[self.train_context[i], self.train_context[j]]
                      for i in [0, 1] for j in [0, 1]]
     model.fit(trainX, trainY, context=train_context)
     preds = model.predict(trainX, context=train_context)
Пример #4
0
    def __init__(self, filename=None, **kwargs):
        super().__init__(filename=(filename or DATA_PATH), **kwargs)

    @property
    def md5(self):
        return CHECKSUM

    def download(self):
        """
        Download quora duplicate questions dataset.
        """
        path = Path(self.filename)
        path.parent.mkdir(parents=True, exist_ok=True)
        comparison_download(
            url="http://qim.ec.quoracdn.net/quora_duplicate_questions.tsv",
            text_column1="question1",
            text_column2="question2",
            target_column="is_duplicate",
            filename=QUORA_SIMILARITY
        )

if __name__ == "__main__":
    # Train and evaluate on SST
    dataset = QuoraDuplicate(nrows=5000).dataframe
    model = Comparison(verbose=True, n_epochs=3)
    trainX1, testX1, trainX2, testX2, trainY, testY = train_test_split(dataset.Text1, dataset.Text2, dataset.Target, test_size=0.3, random_state=42)
    model.fit(trainX1, trainX2, trainY)
    accuracy = np.mean(model.predict(testX1, testX2) == testY)
    class_balance = np.mean(testY)
    print('Test Accuracy: {:0.2f} for a {:0.2f} class balance'.format(accuracy, class_balance))
Пример #5
0
        return CHECKSUM

    def download(self):
        """
        Download quora duplicate questions dataset.
        """
        path = Path(self.filename)
        path.parent.mkdir(parents=True, exist_ok=True)
        comparison_download(url="https://s3.amazonaws.com/enso-data/Quora.csv",
                            text_column1="Text1",
                            text_column2="Text2",
                            target_column="Target",
                            filename=QUORA_SIMILARITY)


if __name__ == "__main__":
    # Train and evaluate on SST
    dataset = QuoraDuplicate(nrows=5000).dataframe
    model = Comparison(n_epochs=1)
    trainX1, testX1, trainX2, testX2, trainY, testY = train_test_split(
        dataset.Text1.values,
        dataset.Text2.values,
        dataset.Target.values,
        test_size=0.3,
        random_state=42)
    model.fit(list(zip(trainX1, trainX2)), trainY)
    accuracy = np.mean(model.predict(list(zip(testX1, testX2))) == testY)
    class_balance = np.mean(testY)
    print('Test Accuracy: {:0.2f} for a {:0.2f} class balance'.format(
        accuracy, class_balance))
Пример #6
0
    def __init__(self, filename=None, **kwargs):
        super().__init__(filename=(filename or DATA_PATH), **kwargs)

    @property
    def md5(self):
        return CHECKSUM

    def download(self):
        """
        Download quora duplicate questions dataset.
        """
        path = Path(self.filename)
        path.parent.mkdir(parents=True, exist_ok=True)
        comparison_download(
            url="http://qim.ec.quoracdn.net/quora_duplicate_questions.tsv",
            text_column1="question1",
            text_column2="question2",
            target_column="is_duplicate",
            filename=QUORA_SIMILARITY
        )

if __name__ == "__main__":
    # Train and evaluate on SST
    dataset = QuoraDuplicate(nrows=5000).dataframe
    model = Comparison(verbose=True, n_epochs=1)
    trainX1, testX1, trainX2, testX2, trainY, testY = train_test_split(dataset.Text1, dataset.Text2, dataset.Target, test_size=0.3, random_state=42)
    model.fit(list(zip(trainX1, trainX2)), trainY)
    accuracy = np.mean(model.predict(list(zip(testX1, testX2))) == testY)
    class_balance = np.mean(testY)
    print('Test Accuracy: {:0.2f} for a {:0.2f} class balance'.format(accuracy, class_balance))