Пример #1
0
    def test_as_discrete(self):
        table = self.data
        domain = table.domain

        tr = AsCategorical()
        dtr = []
        for v in domain.variables:
            vtr = apply_reinterpret(v, tr, table_column_data(table, v))
            dtr.append(vtr)
        tdomain = Domain(dtr)
        ttable = table.transform(tdomain)
        assert_array_equal(
            ttable.X,
            np.array([
                [0, 2, 2, 1],
                [1, 1, 3, 2],
                [2, 0, 1, 3],
                [1, 0, 0, 0],
            ],
                     dtype=float))
        self.assertEqual(tdomain["A"].values, ["a", "b", "c"])
        self.assertEqual(tdomain["B"].values, ["0", "1", "2"])
        self.assertEqual(tdomain["C"].values, ["0.0", "0.2", "0.25", "1.25"])
        self.assertEqual(tdomain["D"].values, [
            "1970-01-01 00:00:00", "1970-01-01 00:03:00",
            "1970-01-01 00:06:00", "1970-01-01 00:12:00"
        ])
Пример #2
0
 def test_reinterpret_string(self):
     table = self.data_str
     domain = table.domain
     tvars = []
     for v in domain.metas:
         for i, tr in enumerate(
             [AsContinuous(),
              AsCategorical(),
              AsTime(),
              AsString()]):
             vtr = apply_reinterpret(v, tr, table_column_data(
                 table, v)).renamed(f"{v.name}_{i}")
             if isinstance(tr, AsTime):
                 strp = StrpTime("Detect automatically", None, 1, 1)
                 vtr = apply_transform_var(vtr, [strp])
             tvars.append(vtr)
     tdomain = Domain([], metas=tvars)
     ttable = table.transform(tdomain)
     assert_array_nanequal(
         ttable.metas,
         np.array([
             [0.1, 0., np.nan, "0.1", 2010., 0., 1262304000., "2010"],
             [1.0, 1., np.nan, "1.0", 2020., 1., 1577836800., "2020"],
         ],
                  dtype=object))
Пример #3
0
    def _make_parts(self, data, group_var=None):
        """
        Make initial `Parts` for data, split by group_var, group_key
        """
        if group_var is not None:
            assert group_var.is_discrete
            _col_data = table_column_data(data, group_var)
            row_indices = [np.flatnonzero(_col_data == i)
                           for i in range(len(group_var.values))]

            row_groups = [RowPart(title=name, indices=ind,
                                  cluster=None, cluster_ordered=None)
                          for name, ind in zip(group_var.values, row_indices)]
            if np.any(_col_data.mask):
                row_groups.append(RowPart(
                    title="N/A", indices=np.flatnonzero(_col_data.mask),
                    cluster=None, cluster_ordered=None
                ))
        else:
            row_groups = [RowPart(title=None, indices=range(0, len(data)),
                                  cluster=None, cluster_ordered=None)]

        col_groups = [
            ColumnPart(
                title=None, indices=range(0, len(data.domain.attributes)),
                domain=data.domain, cluster=None, cluster_ordered=None)
        ]

        minv, maxv = np.nanmin(data.X), np.nanmax(data.X)
        return Parts(row_groups, col_groups, span=(minv, maxv))
Пример #4
0
 def test_reinterpret_string(self):
     table = self.data_str
     domain = table.domain
     tvars = []
     for v in domain.metas:
         for tr in [AsContinuous(), AsCategorical(), AsTime(), AsString()]:
             tr = apply_reinterpret(v, tr, table_column_data(table, v))
             tvars.append(tr)
     tdomain = Domain([], metas=tvars)
     ttable = table.transform(tdomain)
     assert_array_nanequal(
         ttable.metas,
         np.array([
             [0.1, 0., np.nan, "0.1", 2010., 0., 1262304000., "2010"],
             [1.0, 1., np.nan, "1.0", 2020., 1., 1577836800., "2020"],
         ],
                  dtype=object))
Пример #5
0
    def test_as_continuous(self):
        table = self.data
        domain = table.domain

        tr = AsContinuous()
        dtr = []
        for v in domain.variables:
            vtr = apply_reinterpret(v, tr, table_column_data(table, v))
            dtr.append(vtr)
        ttable = table.transform(Domain(dtr))
        assert_array_equal(
            ttable.X,
            np.array([
                [np.nan, 2, 0.25, 180],
                [np.nan, 1, 1.25, 360],
                [np.nan, 0, 0.20, 720],
                [np.nan, 0, 0.00, 000],
            ],
                     dtype=float))
Пример #6
0
    def test_as_string(self):
        table = self.data
        domain = table.domain

        tr = AsString()
        dtr = []
        for v in domain.variables:
            vtr = apply_reinterpret(v, tr, table_column_data(table, v))
            dtr.append(vtr)
        ttable = table.transform(Domain([], [], dtr))
        assert_array_equal(
            ttable.metas,
            np.array([
                ["a", "2", "0.25", "00:03:00"],
                ["b", "1", "1.25", "00:06:00"],
                ["c", "0", "0.2", "00:12:00"],
                ["b", "0", "0.0", "00:00:00"],
            ],
                     dtype=object))
Пример #7
0
    def test_as_time(self):
        # this test only test type of format that can be string, continuous and discrete
        # correctness of time formats is already tested in TimeVariable module
        d = TimeVariable("_").parse_exact_iso
        times = (
            ["07.02.2022", "18.04.2021"],  # date only
            ["07.02.2022 01:02:03", "18.04.2021 01:02:03"],  # datetime
            ["010203", "010203"],  # time
            ["02-07", "04-18"],
        )
        formats = ["25.11.2021", "25.11.2021 00:00:00", "000000", "11-25"]
        expected = [
            [d("2022-02-07"), d("2021-04-18")],
            [d("2022-02-07 01:02:03"),
             d("2021-04-18 01:02:03")],
            [d("01:02:03"), d("01:02:03")],
            [d("1900-02-07"), d("1900-04-18")],
        ]
        variables = [StringVariable(f"s{i}") for i in range(len(times))]
        variables += [
            DiscreteVariable(f"d{i}", values=t) for i, t in enumerate(times)
        ]
        domain = Domain([], metas=variables)
        metas = [t for t in times] + [list(range(len(x))) for x in times]
        table = Table(domain,
                      np.empty((len(times[0]), 0)),
                      metas=np.array(metas).transpose())

        tr = AsTime()
        dtr = []
        for v, f in zip(domain.metas, chain(formats, formats)):
            strp = StrpTime(f, *TimeVariable.ADDITIONAL_FORMATS[f])
            vtr = apply_transform_var(
                apply_reinterpret(v, tr, table_column_data(table, v)), [strp])
            dtr.append(vtr)

        ttable = table.transform(Domain([], metas=dtr))
        assert_array_equal(
            ttable.metas,
            np.array(list(chain(expected, expected)), dtype=float).transpose())