Exemplo n.º 1
0
    def test_log_reg(self):
        # python tests_classification.py Tests_Classification.test_log_reg
        from discomll.classification import logistic_regression

        train_data1, test_data1 = datasets.breastcancer_cont_orange()
        train_data2, test_data2 = datasets.breastcancer_cont_discomll()

        learner = Orange.classification.logreg.LogRegLearner(fitter=Orange.classification.logreg.LogRegFitter_Cholesky)
        classifier = learner(train_data1)
        thetas1 = classifier.beta

        predictions1 = []
        probabilities1 = []
        for inst in test_data1:
            target, probs = classifier(inst, Orange.classification.Classifier.GetBoth)
            predictions1.append(target.value)
            probabilities1.append(probs.values())

        thetas_url = logistic_regression.fit(train_data2, alpha=1e-8, max_iterations=10)
        thetas2 = [v for k, v in result_iterator(thetas_url["logreg_fitmodel"]) if k == "thetas"]
        results_url = logistic_regression.predict(test_data2, thetas_url)

        predictions2 = []
        probabilities2 = []
        for k, v in result_iterator(results_url):
            predictions2.append(v[0])
            probabilities2.append(v[1])
        self.assertTrue(np.allclose(thetas1, thetas2))
        self.assertTrue(np.allclose(probabilities1, probabilities2, atol=1e-5))
        self.assertListEqual(predictions1, predictions2)
Exemplo n.º 2
0
    def test_log_reg(self):
        # python tests_classification.py Tests_Classification.test_log_reg
        from discomll.classification import logistic_regression

        train_data1, test_data1 = datasets.breastcancer_cont_orange()
        train_data2, test_data2 = datasets.breastcancer_cont_discomll()

        learner = Orange.classification.logreg.LogRegLearner(fitter=Orange.classification.logreg.LogRegFitter_Cholesky)
        classifier = learner(train_data1)
        thetas1 = classifier.beta

        predictions1 = []
        probabilities1 = []
        for inst in test_data1:
            target, probs = classifier(inst, Orange.classification.Classifier.GetBoth)
            predictions1.append(target.value)
            probabilities1.append(probs.values())

        thetas_url = logistic_regression.fit(train_data2, alpha=1e-8, max_iterations=10)
        thetas2 = [v for k, v in result_iterator(thetas_url["logreg_fitmodel"]) if k == "thetas"]
        results_url = logistic_regression.predict(test_data2, thetas_url)

        predictions2 = []
        probabilities2 = []
        for k, v in result_iterator(results_url):
            predictions2.append(v[0])
            probabilities2.append(v[1])
        self.assertTrue(np.allclose(thetas1, thetas2))
        self.assertTrue(np.allclose(probabilities1, probabilities2, atol=1e-5))
        self.assertListEqual(predictions1, predictions2)
Exemplo n.º 3
0
def log_reg_fit(input_dict):
    from discomll.classification import logistic_regression

    fitmodel_url = logistic_regression.fit(input_dict["dataset"],
                                           alpha=input_dict["alpha"],
                                           max_iterations=input_dict["itr"],
                                           save_results=True)
    return {"fitmodel_url": fitmodel_url}
Exemplo n.º 4
0
def logreg_fit(input_dict):
    from discomll.classification import logistic_regression

    fitmodel_url = logistic_regression.fit(input_dict["dataset"],
                                           alpha=input_dict["alpha"],
                                           max_iterations=input_dict["itr"],
                                           save_results=True)
    return {"fitmodel_url": fitmodel_url}
Exemplo n.º 5
0
    def test_log_reg_thetas(self):
        # python tests_classification.py Tests_Classification.test_log_reg_thetas
        from discomll.classification import logistic_regression

        train_data1 = datasets.ex4_orange()
        train_data2 = datasets.ex4_discomll()

        lr = Orange.classification.logreg.LogRegFitter_Cholesky(train_data1)
        thetas1 = lr[1]

        thetas_url = logistic_regression.fit(train_data2)
        thetas2 = [v for k, v in result_iterator(thetas_url["logreg_fitmodel"]) if k == "thetas"]

        self.assertTrue(np.allclose(thetas1, thetas2))
Exemplo n.º 6
0
    def test_log_reg_thetas(self):
        # python tests_classification.py Tests_Classification.test_log_reg_thetas
        from discomll.classification import logistic_regression

        train_data1 = datasets.ex4_orange()
        train_data2 = datasets.ex4_discomll()

        lr = Orange.classification.logreg.LogRegFitter_Cholesky(train_data1)
        thetas1 = lr[1]

        thetas_url = logistic_regression.fit(train_data2)
        thetas2 = [v for k, v in result_iterator(thetas_url["logreg_fitmodel"]) if k == "thetas"]

        self.assertTrue(np.allclose(thetas1, thetas2))
Exemplo n.º 7
0
from discomll import dataset
from discomll.classification import logistic_regression
from discomll.utils import model_view

# define training dataset
train = dataset.Data(data_tag=["test:ex4"],
                     data_type="chunk",
                     X_indices=xrange(0, 2),
                     y_index=2,
                     y_map=["0.0000000e+00", "1.0000000e+00"])

# fit model on training dataset
fit_model = logistic_regression.fit(train)

# output model
model = model_view.output_model(fit_model)
print model
Exemplo n.º 8
0
from discomll import dataset
from discomll.classification import logistic_regression

train = dataset.Data(data_tag=["http://ropot.ijs.si/data/ionosphere/train/xaaaaa.gz",
                               "http://ropot.ijs.si/data/ionosphere/train/xaaabj.gz"],
                     data_type="gzip",
                     generate_urls=True,
                     id_index=0,
                     X_indices=range(1, 35),
                     X_meta=["c" for i in range(1, 35)],
                     y_index=35,
                     delimiter=",",
                     y_map=["b", "g"])

test = dataset.Data(data_tag=["http://ropot.ijs.si/data/ionosphere/test/xaaaaa.gz",
                              "http://ropot.ijs.si/data/ionosphere/test/xaaabj.gz"],
                    data_type="gzip",
                    generate_urls=True,
                    id_index=0,
                    X_indices=range(1, 35),
                    X_meta=["c" for i in range(1, 35)],
                    y_index=35,
                    delimiter=",",
                    y_map=["b", "g"])

fit_model = logistic_regression.fit(train, max_iterations=18, alpha=1)
predictions = logistic_regression.predict(test, fit_model)
print predictions
Exemplo n.º 9
0
train = dataset.Data(data_tag=[
    "http://ropot.ijs.si/data/sonar/train/xaaaaa.gz",
    "http://ropot.ijs.si/data/sonar/train/xaaabj.gz"
],
                     data_type="gzip",
                     generate_urls=True,
                     X_indices=range(1, 61),
                     id_index=0,
                     y_index=61,
                     X_meta=["c" for i in range(1, 61)],
                     y_map=["R", "M"],
                     delimiter=",")

test = dataset.Data(data_tag=[
    "http://ropot.ijs.si/data/sonar/test/xaaaaa.gz",
    "http://ropot.ijs.si/data/sonar/test/xaaabj.gz"
],
                    data_type="gzip",
                    generate_urls=True,
                    X_indices=range(1, 61),
                    id_index=0,
                    y_index=61,
                    X_meta=["c" for i in range(1, 61)],
                    y_map=["R", "M"],
                    delimiter=",")

fit_model = logistic_regression.fit(train, max_iterations=10)
predictions = logistic_regression.predict(test, fit_model)
print predictions