示例#1
0
def discretize_ent(infilename,outfilename):
    """
    Discretize features of data sets according to the MDL method proposed by
    [#fayyad1993]_. Necessitate Orange Python module to perform the
    discretization. Only discretize all continuous features of classification datasets.
    
    :param infilename: name of the input file (expecting an arff file)
    :type infilename: string
    :param outfilename: name of the output file
    :type outfilename: string
    """
    
    data = OTable(infilename)
    disc=Disc()
    disc.method=EntropyMDL()

    data_ent = disc(data)

    # Manipulation of the discretized data
    for attr in data_ent.domain.attributes :
        #Reset renamed attributes name to original ones
        if (attr.name[0:2] == "D_"):
            attr.name = attr.name[2:]
            attr.values = [val.replace(',',";") for val in attr.values]
    
    # save the discretized data
    data_ent.save(outfilename)
示例#2
0
def discretize_ent(infilename, outfilename):
    """
    Discretize features of data sets according to the MDL method proposed by
    [#fayyad1993]_. Necessitate Orange Python module to perform the
    discretization. Only discretize all continuous features of classification datasets.
    
    :param infilename: name of the input file (expecting an arff file)
    :type infilename: string
    :param outfilename: name of the output file
    :type outfilename: string
    """

    data = OTable(infilename)
    disc = Disc()
    disc.method = EntropyMDL()

    data_ent = disc(data)

    # Manipulation of the discretized data
    for attr in data_ent.domain.attributes:
        #Reset renamed attributes name to original ones
        if (attr.name[0:2] == "D_"):
            attr.name = attr.name[2:]
            attr.values = [val.replace(',', ";") for val in attr.values]

    # save the discretized data
    data_ent.save(outfilename)
示例#3
0
    def test_bayes(self):
        x = np.random.randint(2, size=(100, 5))
        col = np.random.randint(5)
        y = x[:, col].copy().reshape(100, 1)
        t = Table.from_numpy(None, x, y)
        t = Discretize(method=discretize.EqualWidth(n=3))(t)
        nb = NaiveBayesLearner()
        res = TestOnTrainingData()(t, [nb])
        np.testing.assert_almost_equal(CA(res), [1])

        t.Y[-20:] = 1 - t.Y[-20:]
        res = TestOnTrainingData()(t, [nb])
        self.assertGreaterEqual(CA(res)[0], 0.75)
        self.assertLess(CA(res)[0], 1)
示例#4
0
class NaiveBayesLearner(Learner):
    """
    Naive Bayes classifier. Works only with discrete attributes. By default,
    continuous attributes are discretized.

    Parameters
    ----------
    preprocessors : list, optional (default="[Orange.preprocess.Discretize]")
        An ordered list of preprocessors applied to data before training
        or testing.
    """
    name = 'naive bayes'

    preprocessors = [Discretize()]

    def fit_storage(self, table):
        if not isinstance(table, Storage):
            raise TypeError("Data is not a subclass of Orange.data.Storage.")
        if not all(var.is_discrete for var in table.domain.variables):
            raise NotImplementedError("Only discrete variables are supported.")

        cont = contingency.get_contingencies(table)
        class_freq = np.array(
            np.diag(contingency.get_contingency(table,
                                                table.domain.class_var)))
        return NaiveBayesModel(cont, class_freq, table.domain)
示例#5
0
 def test_learner_scorer_previous_transformation(self):
     learner = LogisticRegressionLearner()
     from Orange.preprocess import Discretize
     data = Discretize()(self.iris)
     scores = learner.score_data(data)
     # scores should be defined and positive
     self.assertTrue(np.all(scores > 0))
示例#6
0
    def set_data(self, data):
        if type(data) == SqlTable and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.init_combos(self.data)
        self.information([0, 1, 2])
        if not self.data:
            self.discrete_data = None
            return
        """ TODO: check
        if data.has_missing_class():
            self.information(1, "Examples with missing classes were removed.")
        """
        if any(attr.is_continuous for attr in data.domain):
            self.discrete_data = Discretize(method=EqualFreq(n=4))(data)
        else:
            self.discrete_data = self.data

        if self.data.domain.class_var is None:
            self.rb_colors.setDisabled(True)
            disc_class = False
        else:
            self.rb_colors.setDisabled(False)
            disc_class = self.data.domain.has_discrete_class
            self.rb_colors.group.button(2).setDisabled(not disc_class)
            self.bar_button.setDisabled(not disc_class)
        self.interior_coloring = bool(disc_class)
        self.openContext(self.data)

        # if we first received subset we now call setSubsetData to process it
        if self.unprocessed_subset_data:
            self.set_subset_data(self.unprocessed_subset_data)
            self.unprocessed_subset_data = None
示例#7
0
    def set_data(self, data):
        if type(data) == SqlTable and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.init_combos(self.data)
        if self.data is None:
            self.discrete_data = None
        elif any(attr.is_continuous for attr in data.domain):
            self.discrete_data = Discretize(
                method=EqualFreq(n=4), discretize_classes=True)(data)
        else:
            self.discrete_data = self.data

        self.vizrank.stop_and_reset()
        self.vizrank_button.setEnabled(
            self.data is not None and len(self.data) > 1 \
            and len(self.data.domain.attributes) >= 1)

        if self.data is None:
            return

        has_class = self.data.domain.class_var is not None
        self.rb_colors.setDisabled(not has_class)
        self.interior_coloring = \
            self.CLASS_DISTRIBUTION if has_class else self.PEARSON

        self.openContext(self.data)

        # if we first received subset we now call setSubsetData to process it
        if self.unprocessed_subset_data:
            self.set_subset_data(self.unprocessed_subset_data)
            self.unprocessed_subset_data = None
 def test_remove_constant(self):
     table = data.Table("iris")
     table[:, 0] = 1
     discretize = Discretize(remove_const=True)
     new_table = discretize(table)
     self.assertNotEqual(len(table.domain.attributes),
                         len(new_table.domain.attributes))
示例#9
0
 def discretizer(data):
     if any(attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas)):
         discretize = Discretize(
             method=EqualFreq(n=4), remove_const=False,
             discretize_classes=True, discretize_metas=True)
         return discretize(data).to_dense()
     return data
class NaiveBayesLearner(Learner):
    """
    Naive Bayes classifier. Works only with discrete attributes. By default,
    continuous attributes are discretized.

    Parameters
    ----------
    preprocessors : list, optional (default="[Orange.preprocess.Discretize]")
        An ordered list of preprocessors applied to data before training
        or testing.
    """

    preprocessors = [RemoveNaNColumns(), Discretize()]
    name = "naive bayes"

    def fit_storage(self, table):
        if not isinstance(table, Storage):
            raise TypeError("Data is not a subclass of Orange.data.Storage.")
        if not all(var.is_discrete for var in table.domain.variables):
            raise NotImplementedError("Only discrete variables are supported.")

        cont = contingency.get_contingencies(table)
        class_freq = np.array(
            np.diag(contingency.get_contingency(table, table.domain.class_var))
        )
        class_prob = (class_freq + 1) / (np.sum(class_freq) + len(class_freq))
        log_cont_prob = [
            np.log(
                (np.array(c) + 1)
                / (np.sum(np.array(c), axis=0)[None, :] + c.shape[0])
                / class_prob[:, None]
            )
            for c in cont
        ]
        return NaiveBayesModel(log_cont_prob, class_prob, table.domain)
示例#11
0
	def set_data(self, data):
		self.closeContext()
		self.clear_messages()
		self.data = data
		self.disc_data = None
		self.selection = []
		if data is not None:
			if len(data) < 2:
				self.Warning.not_enough_inst()
			elif data.Y.size == 0:
				self.Warning.no_class_var()
			else:
				remover = Remove(Remove.RemoveConstant)
				data = remover(data)
				disc_data = Discretize(method=EqualFreq())(data)
				if remover.attr_results["removed"]:
					self.Information.removed_cons_feat()
				if len(disc_data.domain.attributes) < 2:
					self.Warning.not_enough_vars()
				else:
					self.disc_data = disc_data
		self.feature_model.set_domain(self.disc_data and self.disc_data.domain)
		self.openContext(self.disc_data)
		self.apply()
		self.vizrank.button.setEnabled(self.disc_data is not None)
示例#12
0
 def test_keep_constant(self):
     table = data.Table('iris')
     table[:, 0] = 1
     discretize = Discretize(remove_const=False)
     new_table = discretize(table)
     self.assertEqual(len(table.domain.attributes),
                      len(new_table.domain.attributes))
示例#13
0
    def test_discretize_metas(self):
        table = data.Table('iris')
        domain = table.domain
        regr_domain = data.Domain(domain.attributes[:3], [],
                                  [domain.attributes[3], domain.class_var])
        table = data.Table.from_table(regr_domain, table)

        discretize = Discretize(remove_const=False)
        new_table = discretize(table)
        self.assertIs(new_table.domain.metas[0], new_table.domain.metas[0])
        self.assertIs(new_table.domain.metas[1], new_table.domain.metas[1])

        discretize = Discretize(remove_const=False, discretize_metas=True)
        new_table = discretize(table)
        self.assertIsInstance(new_table.domain.metas[0], DiscreteVariable)
        self.assertIs(new_table.domain.metas[1], new_table.domain.metas[1])
示例#14
0
文件: owsieve.py 项目: odipus/orange3
    def set_data(self, data):
        if type(data) == SqlTable and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.areas = []
        if self.data is None:
            self.attrs[:] = []
        else:
            if any(attr.is_continuous for attr in data.domain):
                self.discrete_data = Discretize(method=EqualFreq(n=4))(data)
            else:
                self.discrete_data = self.data
            self.attrs[:] = [
                var for var in chain(self.discrete_data.domain, (
                    var for var in self.data.domain.metas if var.is_discrete))
            ]
        if self.attrs:
            self.attrX = self.attrs[0].name
            self.attrY = self.attrs[len(self.attrs) > 1].name
        else:
            self.attrX = self.attrY = None
            self.areas = self.selection = None
        self.openContext(self.data)
        self.resolve_shown_attributes()
        self.update_selection()
示例#15
0
def create_contingencies(X, callback=None):
    window_size = 1
    dim = len(X.domain)

    X_ = Discretize(method=EqualFreq(n=10))(X)
    m = []
    for i, var in enumerate(X_.domain):
        cleaned_values = [
            tuple(map(str.strip,
                      v.strip('[]()<>=≥').split('-'))) for v in var.values
        ]
        try:
            float_values = [[float(v) for v in vals]
                            for vals in cleaned_values]
            bin_centers = {
                i: v[0] if len(v) == 1 else v[0] + (v[1] - v[0])
                for i, v in enumerate(float_values)
            }
        except ValueError:
            bin_centers = {i: i for i, v in enumerate(cleaned_values)}
        m.append(bin_centers)

    from Orange.data.sql.table import SqlTable
    if isinstance(X, SqlTable):
        conts = []
        al = len(X.domain)
        if al > 1:
            conts.append(create_sql_contingency(X_, [0, 1], m))
            if callback:
                callback(1, al)
            for a1, a2, a3 in zip(range(al), range(1, al), range(2, al)):
                conts.append(create_sql_contingency(X_, [a1, a2, a3], m))
                if callback:
                    callback(a3, al)
            if al > 2:
                conts.append(create_sql_contingency(X_, [al - 2, al - 1], m))
                if callback:
                    callback(al, al)
    else:
        conts = [defaultdict(float) for i in range(len(X_.domain))]
        for i, r in enumerate(X_):
            if any(np.isnan(r)):
                continue
            row = tuple(m[vi].get(v) for vi, v in enumerate(r))
            for l in range(len(X_.domain)):
                lower = l - window_size if l - window_size >= 0 else None
                upper = l + window_size + 1 if l + window_size + 1 <= dim else None
                dims = slice(lower, upper)

                conts[l][row[dims]] += 1
        conts = [zip(*x.items()) for x in conts]
        conts = [(np.array(c), np.array(cw)) for c, cw in conts]

    # for i, ((c1, cw1), (c2, cw2)) in enumerate(zip(contss, conts)):
    #     a = np.sort(np.hstack((c1, cw1[:, None])), axis=0)
    #     b = np.sort(np.hstack((c2, cw2[:, None])), axis=0)
    #     assert_almost_equal(a, b)

    return conts
示例#16
0
 def test_preprocessors_can_be_passed_in_as_generator(self):
     """Since we support iterables, we should support generators as well"""
     pp = (Discretize(), )
     learner = DummyLearnerPP(p for p in pp)
     self.assertEqual(
         tuple(learner.active_preprocessors), pp,
         'Preprocessors should be able to be passed in as single object '
         'as well as an iterable object')
示例#17
0
 def test_preprocessors_can_be_passed_in_as_non_iterable(self):
     """For convenience, we can pass a single preprocessor instance"""
     pp = Discretize()
     learner = DummyLearnerPP(preprocessors=pp)
     self.assertEqual(
         tuple(learner.active_preprocessors), (pp, ),
         'Preprocessors should be able to be passed in as single object '
         'as well as an iterable object')
示例#18
0
 def test_callback(self):
     callback = unittest.mock.Mock()
     learner = DummyLearner(preprocessors=[Discretize(), Randomize()])
     learner(Table("iris"), callback)
     args = [x[0][0] for x in callback.call_args_list]
     self.assertEqual(min(args), 0)
     self.assertEqual(max(args), 1)
     self.assertListEqual(args, sorted(args))
示例#19
0
def predict_wine_quality(table, n):
    #Make the continous varibles discrete
    disc = Discretize()
    disc.method = discretize.EqualWidth(n=n)
    table = disc(table)
    #Define domain
    feature_vars = list(table.domain[1:])
    class_label_var = table.domain[0]
    wine_domain = Domain(feature_vars, class_label_var)
    table = Table.from_table(domain=wine_domain, source=table)
    #Construct learner and print results
    tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ),
                                           max_iter=4000)
    eval_results = CrossValidation(table, [tree_learner], k=10)
    print("Accuracy of cross validation: {:.3f}".format(
        scoring.CA(eval_results)[0]))
    print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))
示例#20
0
 def test_overrides_custom_preprocessors(self):
     """Passing preprocessors to the learner constructor should override the
     default preprocessors defined on the learner"""
     pp = Discretize()
     learner = DummyLearnerPP(preprocessors=(pp, ))
     self.assertEqual(
         tuple(learner.active_preprocessors), (pp, ),
         'Learner should override default preprocessors when specified in '
         'constructor')
示例#21
0
    def set_data(self, data):
        """
        Discretize continuous attributes, and put all attributes and discrete
        metas into self.attrs.

        Select the first two attributes unless context overrides this.
        Method `resolve_shown_attributes` is called to use the attributes from
        the input, if it exists and matches the attributes in the data.

        Remove selection; again let the context override this.
        Initialize the vizrank dialog, but don't show it.

        Args:
            data (Table): input data
        """
        if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.areas = []
        self.selection = set()
        if self.data is None:
            self.attrs[:] = []
            self.domain_model.set_domain(None)
        else:
            self.domain_model.set_domain(data.domain)
            if any(attr.is_continuous
                   for attr in chain(data.domain, data.domain.metas)):
                discretizer = Discretize(method=EqualFreq(n=4),
                                         remove_const=False,
                                         discretize_classes=True,
                                         discretize_metas=True)
                self.discrete_data = discretizer(data)
            else:
                self.discrete_data = data
        self.attrs = [x for x in self.domain_model if isinstance(x, Variable)]
        if self.attrs:
            self.attr_x = self.attrs[0]
            self.attr_y = self.attrs[len(self.attrs) > 1]
        else:
            self.attr_x = self.attr_y = None
            self.areas = []
            self.selection = set()
        self.openContext(self.data)
        self.resolve_shown_attributes()
        self.update_graph()
        self.update_selection()

        self.vizrank.initialize()
        self.vizrank_button.setEnabled(
            self.data is not None and len(self.data) > 1
            and len(self.data.domain.attributes) > 1)
def formatTable(tble):
    '''
    Bins the data, one hot encodes the data
    :param tble:
    :return: data: tble with binned data,
             X: representation of data with one-hot-encoding,
             mapping: representations of what our one-hot-encoding is
    '''
    # Discretization (binning)
    # https://docs.orange.biolab.si/3/data-mining-library/reference/preprocess.html
    print("Discretizing data")
    disc = Discretize()
    disc.method = discretize.EqualWidth(n=4)
    data = disc(tble)
    # print("Discretized table:\n{}\n\n".format(data))

    print("One hot encoding data")
    X, mapping = OneHot.encode(data, include_class=True)
    sorted(mapping.items())

    return data, X, mapping
示例#23
0
    def test_use_default_preprocessors_property(self):
        """We can specify that we want to use default preprocessors despite
        passing our own ones in the constructor"""
        learner = DummyLearnerPP(preprocessors=(Discretize(), ))
        learner.use_default_preprocessors = True

        preprocessors = list(learner.active_preprocessors)
        self.assertEqual(
            len(preprocessors), 2,
            'Learner did not properly insert custom preprocessor into '
            'preprocessor list')
        self.assertIsInstance(
            preprocessors[0], Discretize,
            'Custom preprocessor was inserted in incorrect order')
        self.assertIsInstance(preprocessors[1], Randomize)
示例#24
0
 def _get_discrete_data(self, data):
     """
     Discretize continuous attributes.
     Return None when there is no data, no rows, or no primitive attributes.
     """
     if (data is None or not len(data) or not any(
             attr.is_discrete or attr.is_continuous
             for attr in chain(data.domain.variables, data.domain.metas))):
         return None
     elif any(attr.is_continuous for attr in data.domain.variables):
         return Discretize(method=EqualFreq(n=4),
                           remove_const=False,
                           discretize_classes=True,
                           discretize_metas=True)(data)
     else:
         return data
示例#25
0
 def setUp(self):
     cols = 1000
     rows = 100
     cont = [ContinuousVariable(str(i)) for i in range(cols)]
     disc = [
         DiscreteVariable("D" + str(i), values=("1", "2"))
         for i in range(cols)
     ]
     self.domain = Domain(cont + disc)
     self.domain_x = Domain(
         list(self.domain.attributes) + [ContinuousVariable("x")])
     self.single = Domain([ContinuousVariable("0")])
     self.table = Table.from_numpy(
         self.domain,
         np.random.RandomState(0).randint(0, 2, (rows, len(self.domain))))
     self.discretized_domain = Discretize(EqualFreq(n=3))(self.table).domain
     self.normalized_domain = Normalize()(self.table).domain
示例#26
0
def create_contingencies(X, callback=None):
    window_size = 1
    dim = len(X.domain)

    X_ = Discretize(method=EqualFreq(n=10))(X)
    m = get_bin_centers(X_)

    from Orange.data.sql.table import SqlTable

    if isinstance(X, SqlTable):
        conts = []
        al = len(X.domain)
        if al > 1:
            conts.append(create_sql_contingency(X_, [0, 1], m))
            if callback:
                callback(1, al)
            for a1, a2, a3 in zip(range(al), range(1, al), range(2, al)):
                conts.append(create_sql_contingency(X_, [a1, a2, a3], m))
                if callback:
                    callback(a3, al)
            if al > 2:
                conts.append(create_sql_contingency(X_, [al - 2, al - 1], m))
                if callback:
                    callback(al, al)
    else:
        conts = [defaultdict(float) for i in range(len(X_.domain))]
        for i, r in enumerate(X_):
            if any(np.isnan(r)):
                continue
            row = tuple(m[vi].get(v) for vi, v in enumerate(r))
            for l in range(len(X_.domain)):
                lower = l - window_size if l - window_size >= 0 else None
                upper = l + window_size + 1 if l + window_size + 1 <= dim else None
                dims = slice(lower, upper)

                conts[l][row[dims]] += 1
        conts = [zip(*x.items()) for x in conts]
        conts = [(np.array(c), np.array(cw)) for c, cw in conts]

    # for i, ((c1, cw1), (c2, cw2)) in enumerate(zip(contss, conts)):
    #     a = np.sort(np.hstack((c1, cw1[:, None])), axis=0)
    #     b = np.sort(np.hstack((c2, cw2[:, None])), axis=0)
    #     assert_almost_equal(a, b)

    return conts
示例#27
0
class NaiveBayesLearner(Learner):
    """
    Naive Bayes classifier. Works only with discrete attributes. By default,
    continuous attributes are discretized.

    Parameters
    ----------
    preprocessors : list, optional (default="[Orange.preprocess.Discretize]")
        An ordered list of preprocessors applied to data before training
        or testing.
    """
    preprocessors = [RemoveNaNColumns(), Discretize()]
    name = 'naive bayes'

    def fit_storage(self, table):
        if not isinstance(table, Storage):
            raise TypeError("Data is not a subclass of Orange.data.Storage.")
        if not all(var.is_discrete for var in table.domain.variables):
            raise NotImplementedError("Only categorical variables are "
                                      "supported.")

        cont = contingency.get_contingencies(table)
        class_freq = np.array(
            np.diag(contingency.get_contingency(table,
                                                table.domain.class_var)))
        nclss = (class_freq != 0).sum()
        if not nclss:
            raise ValueError("Data has no defined target values.")

        # Laplacian smoothing considers only classes that appear in the data,
        # in part to avoid cases where the probabilities are affected by empty
        # (or completely spurious) classes that appear because of Orange's reuse
        # of variables. See GH-2943.
        # The corresponding elements of class_probs are set to zero only after
        # mock non-zero values are used in computation of log_cont_prob to
        # prevent division by zero.
        class_prob = (class_freq + 1) / (np.sum(class_freq) + nclss)
        log_cont_prob = [
            np.log((np.array(c) + 1) /
                   (np.sum(np.array(c), axis=0)[None, :] + nclss) /
                   class_prob[:, None]) for c in cont
        ]
        class_prob[class_freq == 0] = 0
        return NaiveBayesModel(log_cont_prob, class_prob, table.domain)
示例#28
0
class TestCA(unittest.TestCase):
    def test_init(self):
        res = Results(nmethods=2, nrows=100)
        res.actual[:50] = 0
        res.actual[50:] = 1
        res.predicted = np.vstack((res.actual, res.actual))
        np.testing.assert_almost_equal(CA(res), [1, 1])

        res.predicted[0][0] = 1
        np.testing.assert_almost_equal(CA(res), [0.99, 1])

        res.predicted[1] = 1 - res.predicted[1]
        np.testing.assert_almost_equal(CA(res), [0.99, 0])

    def test_call(self):
        res = Results(nmethods=2, nrows=100)
        res.actual[:50] = 0
        res.actual[50:] = 1
        res.predicted = np.vstack((res.actual, res.actual))
        ca = CA()
        np.testing.assert_almost_equal(ca(res), [1, 1])

        res.predicted[0][0] = 1
        np.testing.assert_almost_equal(ca(res), [0.99, 1])

        res.predicted[1] = 1 - res.predicted[1]
        np.testing.assert_almost_equal(ca(res), [0.99, 0])

    def test_bayes(self):
        x = np.random.randint(2, size=(100, 5))
       col = np.random.randint(5)
        y = x[:, col].copy().reshape(100, 1)
        t = Table(x, y)
        t = Discretize(
            method=discretize.EqualWidth(n=3))(t)
        nb = NaiveBayesLearner()
        res = TestOnTrainingData(t, [nb])
        np.testing.assert_almost_equal(CA(res), [1])

        t.Y[-20:] = 1 - t.Y[-20:]
        res = TestOnTrainingData(t, [nb])
        self.assertGreaterEqual(CA(res)[0], 0.75)
        self.assertLess(CA(res)[0], 1)
示例#29
0
    def _setup(self):
        self.plot.clear()
        self.plot_prob.clear()
        self._legend.clear()
        self._legend.hide()

        varidx = self.variable_idx
        self.var = self.cvar = None
        if varidx >= 0:
            self.var = self.varmodel[varidx]
        if self.groupvar_idx > 0:
            self.cvar = self.groupvarmodel[self.groupvar_idx]
            prob = self.controls.show_prob
            prob.clear()
            prob.addItem("(None)")
            prob.addItems(self.cvar.values)
            prob.addItem("(All)")
            self.show_prob = min(max(self.show_prob, 0),
                                 len(self.cvar.values) + 1)
        data = self.data
        self._setup_smoothing()
        if self.var is None:
            return
        if self.disc_cont:
            domain = Orange.data.Domain(
                [self.var, self.cvar] if self.cvar else [self.var])
            data = Orange.data.Table(domain, data)
            disc = EqualWidth(n=self.bins[self.smoothing_index])
            data = Discretize(method=disc, remove_const=False)(data)
            self.var = data.domain[0]
        self.set_left_axis_name()
        self.enable_disable_rel_freq()
        self.controls.cumulative_distr.setDisabled(not self.var.is_continuous)
        if self.cvar:
            self.contingencies = \
                contingency.get_contingency(data, self.var, self.cvar)
            self.display_contingency()
        else:
            self.distributions = \
                distribution.get_distribution(data, self.var)
            self.display_distribution()
        self.plot.autoRange()
示例#30
0
    def test_discrete_features(self):
        combo = self.widget.controls._feature
        model = combo.model()
        disc_housing = Discretize()(self.housing)
        self.send_signal(self.widget.Inputs.data, disc_housing)
        self.assertEqual(model.rowCount(), 1)
        self.assertTrue(self.widget.Error.data_error.is_shown())

        continuizer = Continuize()
        self.send_signal(self.widget.Inputs.preprocessor, continuizer)
        self.assertGreater(model.rowCount(), 1)
        self.assertFalse(self.widget.Error.data_error.is_shown())

        self.send_signal(self.widget.Inputs.preprocessor, None)
        self.assertEqual(model.rowCount(), 1)
        self.assertTrue(self.widget.Error.data_error.is_shown())

        self.send_signal(self.widget.Inputs.data, None)
        self.assertEqual(model.rowCount(), 1)
        self.assertFalse(self.widget.Error.data_error.is_shown())
示例#31
0
 def setUp(self):
     self.iris = Table("iris")
     self.adult = Table("adult")
     self.discretizer = Discretize(EqualFreq(n=3))