Пример #1
0
def grid_bin(data, xvar, yvar, xbins, ybins, zvar=None):
    x_disc = Discretizer.create_discretized_var(xvar, xbins[1:-1])
    y_disc = Discretizer.create_discretized_var(yvar, ybins[1:-1])

    x_min, x_max = xbins[0], xbins[-1]
    y_min, y_max = ybins[0], ybins[-1]

    querydomain = [x_disc, y_disc]
    if zvar is not None:
        querydomain = querydomain + [zvar]

    querydomain = Orange.data.Domain(querydomain)

    def interval_filter(var, low, high):
        return Orange.data.filter.Values([
            Orange.data.filter.FilterContinuous(
                var,
                max=high,
                min=low,
                oper=Orange.data.filter.FilterContinuous.Between)
        ])

    def value_filter(var, val):
        return Orange.data.filter.Values(
            [Orange.data.filter.FilterDiscrete(var, [val])])

    def filters_join(filters):
        return Orange.data.filter.Values(
            reduce(list.__iadd__, (f.conditions for f in filters), []))

    inf_bounds = np.isinf([x_min, x_max, y_min, y_max])
    if not all(inf_bounds):
        # No need to filter the data
        range_filters = [
            interval_filter(xvar, x_min, x_max),
            interval_filter(yvar, y_min, y_max)
        ]
        range_filter = filters_join(range_filters)
        subset = range_filter(data)
    else:
        subset = data

    if zvar and zvar.is_discrete:
        filters = [value_filter(zvar, val) for val in zvar.values]
        contingencies = [
            contingency.get_contingency(filter_(
                subset.from_table(querydomain, subset)),
                                        col_variable=y_disc,
                                        row_variable=x_disc)
            for filter_ in filters
        ]
        contingencies = np.dstack(contingencies)
    else:
        contingencies = contingency.get_contingency(subset.from_table(
            querydomain, subset),
                                                    col_variable=y_disc,
                                                    row_variable=x_disc)

    contingencies = np.asarray(contingencies)
    return Tree(xbins, ybins, contingencies, None)
Пример #2
0
def grid_bin(data, xvar, yvar, xbins, ybins, zvar=None):
    x_disc = Discretizer.create_discretized_var(xvar, xbins[1:-1])
    y_disc = Discretizer.create_discretized_var(yvar, ybins[1:-1])

    x_min, x_max = xbins[0], xbins[-1]
    y_min, y_max = ybins[0], ybins[-1]

    querydomain = [x_disc, y_disc]
    if zvar is not None:
        querydomain = querydomain + [zvar]

    querydomain = Orange.data.Domain(querydomain)

    def interval_filter(var, low, high):
        return Orange.data.filter.Values(
            [Orange.data.filter.FilterContinuous(
                 var, max=high, min=low,
                 oper=Orange.data.filter.FilterContinuous.Between)]
        )

    def value_filter(var, val):
        return Orange.data.filter.Values(
            [Orange.data.filter.FilterDiscrete(var, [val])]
        )

    def filters_join(filters):
        return Orange.data.filter.Values(
            reduce(list.__iadd__, (f.conditions for f in filters), [])
        )

    inf_bounds = np.isinf([x_min, x_max, y_min, y_max])
    if not all(inf_bounds):
        # No need to filter the data
        range_filters = [interval_filter(xvar, x_min, x_max),
                         interval_filter(yvar, y_min, y_max)]
        range_filter = filters_join(range_filters)
        subset = range_filter(data)
    else:
        subset = data

    if zvar.is_discrete:

        filters = [value_filter(zvar, val) for val in zvar.values]
        contingencies = [
            contingency.get_contingency(
                filter_(subset.from_table(querydomain, subset)),
                col_variable=y_disc, row_variable=x_disc
            )
            for filter_ in filters
        ]
        contingencies = np.dstack(contingencies)
    else:
        contingencies = contingency.get_contingency(
            subset.from_table(querydomain, subset),
            col_variable=y_disc, row_variable=x_disc
        )

    contingencies = np.asarray(contingencies)
    return Tree(xbins, ybins, contingencies, None)
Пример #3
0
    def test_equality(self):
        v1 = ContinuousVariable("x")
        v2 = ContinuousVariable("x", number_of_decimals=42)
        v3 = ContinuousVariable("y")
        assert v1 == v2

        t1 = Discretizer(v1, [0, 2, 1])
        t1a = Discretizer(v2, [0, 2, 1])
        t2 = Discretizer(v3, [0, 2, 1])
        self.assertEqual(t1, t1)
        self.assertEqual(t1, t1a)
        self.assertNotEqual(t1, t2)

        self.assertEqual(hash(t1), hash(t1a))
        self.assertNotEqual(hash(t1), hash(t2))

        t1 = Discretizer(v1, [0, 2, 1])
        t1a = Discretizer(v2, [1, 2, 0])
        self.assertNotEqual(t1, t1a)
        self.assertNotEqual(hash(t1), hash(t1a))