Пример #1
0
    def test_run(self):
        # part 1
        imputer = MeanImputation(hyperparams=hp)

        imputer.set_training_data(inputs=data)
        imputer.fit(timeout=self.enough_time)
        print(imputer.get_params())
        self.assertEqual(imputer._has_finished, True)
        self.assertEqual(imputer._iterations_done, True)

        result = imputer.produce(inputs=data, timeout=self.enough_time).value
        self.helper_impute_result_check(data, result)

        # part2: test set_params()
        imputer2 = MeanImputation(hyperparams=hp)

        imputer2.set_params(params=imputer.get_params())
        self.assertEqual(imputer2._has_finished, True)
        self.assertEqual(imputer2._iterations_done, True)

        result2 = imputer2.produce(inputs=data, timeout=self.enough_time).value
        self.assertEqual(result2.equals(result),
                         True)  # two imputers' results should be same
        self.assertEqual(imputer2._has_finished, True)
        self.assertEqual(imputer2._iterations_done, True)
Пример #2
0
    def test_noMV(self):
        """
		test on the dataset has no missing values
		"""
        imputer = MeanImputation(hyperparams=hp)

        imputer.set_training_data(inputs=data)
        imputer.fit(timeout=self.enough_time)
        result = imputer.produce(inputs=data, timeout=self.enough_time).value
        # 1. check produce(): `result` contains no missing value
        result2 = imputer.produce(inputs=result,
                                  timeout=self.enough_time).value

        self.assertEqual(result.equals(result2), True)

        # 2. check fit() & get_params() try fit on no-missing-value dataset
        imputer2 = MeanImputation(hyperparams=hp)
        imputer.set_training_data(inputs=result)
        imputer.fit(timeout=self.enough_time)
        print(imputer.get_params())
Пример #3
0
print(testData.head())

hp = EncHyperparameter.sample()
enc = Encoder(hyperparams=hp)
enc.set_training_data(inputs=trainData)
enc.fit()
encodedData = enc.produce(inputs=trainData).value
encodedTestData = enc.produce(inputs=testData).value

# Initialize the DSBox imputer
hp = MeanHyperparameter.sample()
imputer = MeanImputation(hyperparams=hp)
imputer.set_training_data(inputs=encodedData)  # unsupervised
imputer.fit(timeout=100)  # give 100 seconds to fit
print("\nParams:")
print(imputer.get_params())

imputer2 = MeanImputation(hyperparams=hp)
imputer2.set_params(params=imputer.get_params())

imputedData = imputer2.produce(inputs=encodedData, timeout=100).value

model = BaggingClassifier()
trainedModel = model.fit(imputedData, np.asarray(trainTargets['Class']))

predictedTargets = trainedModel.predict(
    imputer.produce(inputs=encodedTestData).value)

# Append the d3mindex column to the predicted targets
predictedTargets = pd.DataFrame({
    'd3mIndex': d3mIndex['d3mIndex'],