Exemplo n.º 1
0
    def __get_pivot_tab_domain(self, val_var, X, X_h, X_v, X_t, agg_funs):
        def map_values(index, _X):
            values = np.unique(_X[:, index])
            values = np.delete(values, np.where(values == "nan")[0])
            for j, value in enumerate(values):
                _X[:, index][_X[:, index] == value] = j
            return values

        vals = np.array(self._col_var.values)[self._col_var_groups.astype(int)]
        if not val_var or val_var.is_continuous:
            cv = ContinuousVariable
            attrs = [[cv(f"{v}", 1) for v in vals]] * 2
            attrs.extend([[cv("Total", 1)]] * 2)
        else:
            attrs = []
            for x in (X, X_h):
                attrs.append([
                    DiscreteVariable(f"{v}", map_values(i, x))
                    for i, v in enumerate(vals, 2)
                ])
            for x in (X_v, X_t):
                attrs.append([DiscreteVariable("Total", map_values(0, x))])
        row_var_h = DiscreteVariable(self._row_var.name, values=["Total"])
        aggr_attr = DiscreteVariable('Aggregate', [str(f) for f in agg_funs])

        same_row_col = self._col_var is self._row_var

        extra_vars = [self._row_var, aggr_attr]
        uniq_a = get_unique_names_duplicates([v.name for v in extra_vars] +
                                             [atr.name for atr in attrs[0]])
        for (idx, var), u in zip(enumerate(chain(extra_vars, attrs[0])),
                                 uniq_a):
            if var.name == u:
                continue
            if idx == 0:
                self.renamed.append(self._row_var.name)
                self._row_var = self._row_var.copy(name=u)
                if same_row_col:
                    self._col_var = self._row_var
                row_var_h = row_var_h.copy(name=u)
            elif idx == 1:
                self.renamed.append(aggr_attr.name)
                aggr_attr = aggr_attr.copy(name=u)
            else:
                self.renamed.append(var.name)
                attrs[0][idx - 2] = var.copy(name=u)
                attrs[1][idx - 2] = var.copy(name=u)

        if same_row_col:
            vals = tuple(v.name for v in attrs[0])
            self._row_var.make(self._row_var.name, values=vals)
            vals = tuple(v.name for v in attrs[2])
            row_var_h.make(row_var_h.name, vals)

        return (Domain([self._row_var, aggr_attr] + attrs[0]),
                Domain([row_var_h, aggr_attr] + attrs[1]), Domain(attrs[2]),
                Domain(attrs[3]))
Exemplo n.º 2
0
    def test_copy_checks_len_values(self):
        var = DiscreteVariable("gender", values=("F", "M"))
        self.assertEqual(var.values, ("F", "M"))

        self.assertRaises(ValueError, var.copy, values=("F", "M", "N"))
        self.assertRaises(ValueError, var.copy, values=("F", ))
        self.assertRaises(ValueError, var.copy, values=())

        var2 = var.copy()
        self.assertEqual(var2.values, ("F", "M"))

        var2 = var.copy(values=None)
        self.assertEqual(var2.values, ("F", "M"))

        var2 = var.copy(values=("W", "M"))
        self.assertEqual(var2.values, ("W", "M"))