Exemplo n.º 1
0
    def test_convert(self):
        contents = (b'I, J,  K\n' b' , A,   \n' b'B,  ,  1\n' b'?, ., NA')

        class dialect(csv.excel):
            skipinitialspace = True

        opts = owcsvimport.Options(encoding="ascii",
                                   dialect=dialect(),
                                   columntypes=[
                                       (range(0, 1), ColumnType.Text),
                                       (range(1, 2), ColumnType.Categorical),
                                       (range(2, 3), ColumnType.Text),
                                   ],
                                   rowspec=[(range(0, 1), RowSpec.Header)])
        df = owcsvimport.load_csv(io.BytesIO(contents), opts)
        tb = pandas_to_table(df)

        assert_array_equal(tb.metas[:, 0], ["", "B", "?"])
        assert_array_equal(tb.metas[:, 1], ["", "1", "NA"])
        assert_array_equal(tb.X[:, 0], [0.0, np.nan, np.nan])

        opts = owcsvimport.Options(encoding="ascii",
                                   dialect=dialect(),
                                   columntypes=[
                                       (range(0, 1), ColumnType.Categorical),
                                       (range(1, 2), ColumnType.Categorical),
                                       (range(2, 3), ColumnType.Numeric),
                                   ],
                                   rowspec=[(range(0, 1), RowSpec.Header)])
        df = owcsvimport.load_csv(io.BytesIO(contents), opts)
        tb = pandas_to_table(df)

        assert_array_equal(tb.X[:, 0], [np.nan, 0, np.nan])
        assert_array_equal(tb.X[:, 1], [0, np.nan, np.nan])
        assert_array_equal(tb.X[:, 2], [np.nan, 1, np.nan])
 def test_load_csv(self):
     contents = (
         b'1/1/1990,1.0,[,one,\n'
         b'1/1/1990,2.0,],two,\n'
         b'1/1/1990,3.0,{,three,'
     )
     opts = owcsvimport.Options(
         encoding="ascii",
         dialect=csv.excel(),
         columntypes=[
             (range(0, 1), ColumnType.Time),
             (range(1, 2), ColumnType.Numeric),
             (range(2, 3), ColumnType.Text),
             (range(3, 4), ColumnType.Categorical),
             (range(4, 5), ColumnType.Auto),
         ],
         rowspec=[]
     )
     df = owcsvimport.load_csv(io.BytesIO(contents), opts)
     self.assertEqual(df.shape, (3, 5))
     self.assertSequenceEqual(
         list(df.dtypes),
         [np.dtype("M8[ns]"), np.dtype(float), np.dtype(object),
          "category", np.dtype(float)],
     )
     opts = owcsvimport.Options(
         encoding="ascii",
         dialect=csv.excel(),
         columntypes=[
             (range(0, 1), ColumnType.Skip),
             (range(1, 2), ColumnType.Numeric),
             (range(2, 3), ColumnType.Skip),
             (range(3, 4), ColumnType.Categorical),
             (range(4, 5), ColumnType.Skip),
         ],
         rowspec=[
             (range(1, 2), RowSpec.Skipped)
         ]
     )
     df = owcsvimport.load_csv(io.BytesIO(contents), opts)
     self.assertEqual(df.shape, (2, 2))
     self.assertSequenceEqual(
         list(df.dtypes), [np.dtype(float), "category"]
     )
     self.assertSequenceEqual(
         list(df.iloc[:, 0]), [1.0, 3.0]
     )
     self.assertSequenceEqual(
         list(df.iloc[:, 1]), ["one", "three"]
     )
 def test_dialog():
     dirname = os.path.dirname(__file__)
     path = os.path.join(dirname, "grep_file.txt")
     d = owcsvimport.CSVImportDialog()
     d.setPath(path)
     opts = owcsvimport.Options(
         encoding="utf-8",
         dialect=owcsvimport.textimport.Dialect(
             " ", "\"", "\\", True, True
         ),
         columntypes=[
             (range(0, 2), ColumnType.Numeric),
             (range(2, 3), ColumnType.Categorical)
         ],
         rowspec=[
             (range(0, 4), RowSpec.Skipped),
             (range(4, 5), RowSpec.Header),
             (range(8, 13), RowSpec.Skipped),
         ]
     )
     d.setOptions(opts)
     d.restoreDefaults()
     opts1 = d.options()
     d.reset()
     opts1 = d.options()
 def test_discrete_values_sort(self):
     """ Values in the discrete variable should be naturally sorted """
     dirname = os.path.dirname(__file__)
     path = os.path.join(dirname, "data-csv-types.tab")
     options = owcsvimport.Options(
         encoding="ascii", dialect=csv.excel_tab(),
         columntypes=[
             (range(0, 1), ColumnType.Auto),
             (range(1, 2), ColumnType.Categorical),
             (range(2, 5), ColumnType.Auto)
         ]
     )
     widget = self.create_widget(
         owcsvimport.OWCSVFileImport,
         stored_settings={
             "_session_items": [
                 (path, options.as_dict())
             ],
             "__version__": 2  # guessing works for versions >= 2
         }
     )
     widget.commit()
     self.wait_until_finished(widget)
     output = self.get_output("Data", widget)
     self.assertTupleEqual(('1', '3', '4', '5', '12'), output.domain.attributes[1].values)
    def test_decimal_format(self):
        class Dialect(csv.excel):
            delimiter = ";"

        contents = b'3,21;3,37\n4,13;1.000,142'
        opts = owcsvimport.Options(
            encoding="ascii",
            dialect=Dialect(),
            decimal_separator=",",
            group_separator=".",
            columntypes=[
                (range(0, 2), ColumnType.Numeric),
            ],
            rowspec=[],
        )
        df = owcsvimport.load_csv(io.BytesIO(contents), opts)
        assert_array_equal(df.values, np.array([[3.21, 3.37], [4.13, 1000.142]]))
Exemplo n.º 6
0
class TestOWCSVFileImport(WidgetTest):
    def setUp(self):
        self._stack = ExitStack().__enter__()
        # patch `_local_settings` to avoid side effects, across tests
        fname = self._stack.enter_context(named_file(""))
        s = QSettings(fname, QSettings.IniFormat)
        self._stack.enter_context(
            mock.patch.object(owcsvimport.OWCSVFileImport, "_local_settings",
                              lambda *a: s))
        self.widget = self.create_widget(owcsvimport.OWCSVFileImport)

    def tearDown(self):
        self.widgets.remove(self.widget)
        self.widget.onDeleteWidget()
        self.widget = None
        self._stack.close()

    def test_basic(self):
        w = self.widget
        w.activate_recent(0)
        w.cancel()

    data_regions_options = owcsvimport.Options(
        encoding="ascii",
        dialect=csv.excel_tab(),
        columntypes=[
            (range(0, 1), ColumnType.Categorical),
            (range(1, 2), ColumnType.Text),
            (range(2, 3), ColumnType.Categorical),
        ],
        rowspec=[
            (range(0, 1), RowSpec.Header),
            (range(1, 3), RowSpec.Skipped),
        ],
    )

    def _check_data_regions(self, table):
        self.assertEqual(len(table), 3)
        self.assertEqual(len(table), 3)
        self.assertTrue(table.domain["id"].is_discrete)
        self.assertTrue(table.domain["continent"].is_discrete)
        self.assertTrue(table.domain["state"].is_string)
        assert_array_equal(table.X, [[0, 1], [1, 1], [2, 0]])
        assert_array_equal(table.metas,
                           np.array([["UK"], ["Russia"], ["Mexico"]], object))

    def test_restore(self):
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-regions.tab")

        w = self.create_widget(owcsvimport.OWCSVFileImport,
                               stored_settings={
                                   "_session_items":
                                   [(path, self.data_regions_options.as_dict())
                                    ]
                               })
        item = w.current_item()
        self.assertEqual(item.path(), path)
        self.assertEqual(item.options(), self.data_regions_options)
        out = self.get_output("Data", w)
        self._check_data_regions(out)
        self.assertEqual(out.name, "data-regions")

    def test_restore_from_local(self):
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-regions.tab")
        s = owcsvimport.OWCSVFileImport._local_settings()
        s.clear()
        QSettings_writeArray(
            s, "recent",
            [{
                "path": path,
                "options": json.dumps(self.data_regions_options.as_dict())
            }])
        w = self.create_widget(owcsvimport.OWCSVFileImport, )
        item = w.current_item()
        self.assertEqual(item.path(), path)
        self.assertEqual(item.options(), self.data_regions_options)
        self.assertEqual(
            w._session_items,
            [(path, self.data_regions_options.as_dict())],
            "local settings item must be recorded in _session_items when "
            "activated in __init__",
        )
        self._check_data_regions(self.get_output("Data", w))

    def test_summary(self):
        """Check if status bar is updated when data is received"""
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-regions.tab")
        widget = self.create_widget(owcsvimport.OWCSVFileImport,
                                    stored_settings={
                                        "_session_items":
                                        [(path,
                                          self.data_regions_options.as_dict())]
                                    })
        output_sum = widget.info.set_output_summary = mock.Mock()
        widget.commit()
        self.wait_until_finished(widget)
        output = self.get_output("Data", widget)
        output_sum.assert_called_with(len(output),
                                      format_summary_details(output))
Exemplo n.º 7
0
class TestOWCSVFileImport(WidgetTest):
    def create_widget(self,
                      cls: Type[W],
                      stored_settings: Optional[dict] = None,
                      reset_default_settings=True,
                      **kwargs) -> W:
        if reset_default_settings:
            self.reset_default_settings(cls)
        widget = cls.__new__(cls,
                             signal_manager=self.signal_manager,
                             stored_settings=stored_settings,
                             **kwargs)
        widget.__init__()

        def delete():
            widget.onDeleteWidget()
            widget.close()
            widget.deleteLater()

        self._stack.callback(delete)
        return widget

    def setUp(self):
        super().setUp()
        self._stack = ExitStack().__enter__()
        # patch `_local_settings` to avoid side effects, across tests
        fname = self._stack.enter_context(named_file(""))
        s = QSettings(fname, QSettings.IniFormat)
        self._stack.enter_context(
            mock.patch.object(owcsvimport.OWCSVFileImport, "_local_settings",
                              lambda *a: s))
        self.widget = self.create_widget(owcsvimport.OWCSVFileImport)

    def tearDown(self):
        del self.widget
        self._stack.close()
        super().tearDown()

    def test_basic(self):
        w = self.widget
        w.activate_recent(0)
        w.cancel()

    data_regions_options = owcsvimport.Options(
        encoding="ascii",
        dialect=csv.excel_tab(),
        columntypes=[
            (range(0, 1), ColumnType.Categorical),
            (range(1, 2), ColumnType.Text),
            (range(2, 3), ColumnType.Categorical),
        ],
        rowspec=[
            (range(0, 1), RowSpec.Header),
            (range(1, 3), RowSpec.Skipped),
        ],
    )
    data_regions_path = os.path.join(os.path.dirname(__file__),
                                     "data-regions.tab")

    def _check_data_regions(self, table):
        self.assertEqual(len(table), 3)
        self.assertEqual(len(table), 3)
        self.assertTrue(table.domain["id"].is_discrete)
        self.assertTrue(table.domain["continent"].is_discrete)
        self.assertTrue(table.domain["state"].is_string)
        assert_array_equal(table.X, [[0, 1], [1, 1], [2, 0]])
        assert_array_equal(table.metas,
                           np.array([["UK"], ["Russia"], ["Mexico"]], object))

    def test_restore(self):
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-regions.tab")

        w = self.create_widget(owcsvimport.OWCSVFileImport,
                               stored_settings={
                                   "_session_items":
                                   [(path, self.data_regions_options.as_dict())
                                    ]
                               })
        item = w.current_item()
        self.assertTrue(samepath(item.path(), path))
        self.assertEqual(item.options(), self.data_regions_options)
        out = self.get_output("Data", w)
        self._check_data_regions(out)
        self.assertEqual(out.name, "data-regions")

    def test_restore_from_local(self):
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-regions.tab")
        s = owcsvimport.OWCSVFileImport._local_settings()
        s.clear()
        QSettings_writeArray(
            s, "recent",
            [{
                "path": path,
                "options": json.dumps(self.data_regions_options.as_dict())
            }])
        w = self.create_widget(owcsvimport.OWCSVFileImport, )
        item = w.current_item()
        self.assertIsNone(item)
        simulate.combobox_activate_index(w.recent_combo, 0)
        item = w.current_item()
        self.assertTrue(samepath(item.path(), path))
        self.assertEqual(item.options(), self.data_regions_options)
        data = w.settingsHandler.pack_data(w)
        self.assertEqual(
            data['_session_items_v2'],
            [(PathItem.AbsPath(path).as_dict(),
              self.data_regions_options.as_dict())],
            "local settings item must be recorded in _session_items_v2 when "
            "activated",
        )
        self._check_data_regions(self.get_output("Data", w))

    data_csv_types_options = owcsvimport.Options(encoding="ascii",
                                                 dialect=csv.excel_tab(),
                                                 columntypes=[
                                                     (range(0, 5),
                                                      ColumnType.Auto),
                                                 ])

    def test_type_guessing(self):
        """ Check if correct column type is guessed when column type auto """
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-csv-types.tab")
        widget = self.create_widget(
            owcsvimport.OWCSVFileImport,
            stored_settings={
                "_session_items":
                [(path, self.data_csv_types_options.as_dict())],
                "__version__": 2  # guessing works for versions >= 2
            })
        widget.commit()
        self.wait_until_finished(widget)
        output = self.get_output("Data", widget)
        domain = output.domain

        self.assertIsInstance(domain["time"], TimeVariable)
        self.assertIsInstance(domain["discrete1"], DiscreteVariable)
        self.assertIsInstance(domain["discrete2"], DiscreteVariable)
        self.assertIsInstance(domain["numeric1"], ContinuousVariable)
        self.assertIsInstance(domain["numeric2"], ContinuousVariable)
        self.assertIsInstance(domain["string"], StringVariable)

    def test_discrete_values_sort(self):
        """ Values in the discrete variable should be naturally sorted """
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-csv-types.tab")
        options = owcsvimport.Options(encoding="ascii",
                                      dialect=csv.excel_tab(),
                                      columntypes=[
                                          (range(0, 1), ColumnType.Auto),
                                          (range(1,
                                                 2), ColumnType.Categorical),
                                          (range(2, 5), ColumnType.Auto)
                                      ])
        widget = self.create_widget(
            owcsvimport.OWCSVFileImport,
            stored_settings={
                "_session_items": [(path, options.as_dict())],
                "__version__": 2  # guessing works for versions >= 2
            })
        widget.commit()
        self.wait_until_finished(widget)
        output = self.get_output("Data", widget)
        self.assertTupleEqual(('1', '3', '4', '5', '12'),
                              output.domain.attributes[1].values)

    def test_backward_compatibility(self):
        """
        Check that widget have old behaviour on workflows with version < 2
        """
        dirname = os.path.dirname(__file__)
        path = os.path.join(dirname, "data-csv-types.tab")
        widget = self.create_widget(
            owcsvimport.OWCSVFileImport,
            stored_settings={
                "_session_items":
                [(path, self.data_csv_types_options.as_dict())],
                "__version__": 1  # guessing works for versions >= 2
            })
        widget.commit()
        self.wait_until_finished(widget)
        output = self.get_output("Data", widget)
        domain = output.domain

        self.assertIsInstance(domain["time"], StringVariable)
        self.assertIsInstance(domain["discrete1"], ContinuousVariable)
        self.assertIsInstance(domain["discrete2"], StringVariable)
        self.assertIsInstance(domain["numeric1"], ContinuousVariable)
        self.assertIsInstance(domain["numeric2"], ContinuousVariable)
        self.assertIsInstance(domain["string"], StringVariable)

    @staticmethod
    @contextmanager
    def _browse_setup(widget: OWCSVFileImport, path: str):
        browse_dialog = widget._browse_dialog
        with mock.patch.object(widget, "_browse_dialog") as r:
            dlg = browse_dialog()
            dlg.setOption(QFileDialog.DontUseNativeDialog)
            dlg.selectFile(path)
            dlg.exec = lambda: QFileDialog.Accepted
            r.return_value = dlg
            with mock.patch.object(owcsvimport.CSVImportDialog, "exec",
                                   lambda _: QFileDialog.Accepted):
                yield

    def test_browse(self):
        widget = self.widget
        path = self.data_regions_path
        with self._browse_setup(widget, path):
            widget.browse()
        cur = widget.current_item()
        self.assertIsNotNone(cur)
        self.assertTrue(samepath(cur.path(), path))

    def test_browse_prefix(self):
        widget = self.widget
        path = self.data_regions_path
        with self._browse_setup(widget, path):
            basedir = os.path.dirname(__file__)
            widget.workflowEnv = lambda: {"basedir": basedir}
            widget.workflowEnvChanged("basedir", basedir, "")
            widget.browse_relative(prefixname="basedir")

        cur = widget.current_item()
        self.assertIsNotNone(cur)
        self.assertTrue(samepath(cur.path(), path))
        self.assertIsInstance(cur.varPath(), PathItem.VarPath)

    def test_browse_prefix_parent(self):
        widget = self.widget
        path = self.data_regions_path

        with self._browse_setup(widget, path):
            basedir = os.path.join(os.path.dirname(__file__), "bs")
            widget.workflowEnv = lambda: {"basedir": basedir}
            widget.workflowEnvChanged("basedir", basedir, "")
            mb = widget._path_must_be_relative_mb = mock.Mock()
            widget.browse_relative(prefixname="basedir")
            mb.assert_called()
        self.assertIsNone(widget.current_item())

    def test_browse_for_missing(self):
        missing = os.path.dirname(__file__) + "/this file does not exist.csv"
        widget = self.create_widget(owcsvimport.OWCSVFileImport,
                                    stored_settings={
                                        "_session_items":
                                        [(missing,
                                          self.data_regions_options.as_dict())]
                                    })
        widget.activate_recent(0)
        dlg = widget.findChild(QFileDialog)
        assert dlg is not None
        # calling selectFile when using native (macOS) dialog does not have
        # an effect - at least not immediately;
        dlg.setOption(QFileDialog.DontUseNativeDialog)
        dlg.selectFile(self.data_regions_path)
        dlg.accept()
        cur = widget.current_item()
        self.assertTrue(samepath(self.data_regions_path, cur.path()))
        self.assertEqual(self.data_regions_options.as_dict(),
                         cur.options().as_dict())

    def test_browse_for_missing_prefixed(self):
        path = self.data_regions_path
        basedir = os.path.dirname(path)
        widget = self.create_widget(
            owcsvimport.OWCSVFileImport,
            stored_settings={
                "__version__":
                3,
                "_session_items_v2":
                [(PathItem.VarPath("basedir",
                                   "this file does not exist.csv").as_dict(),
                  self.data_regions_options.as_dict())]
            },
            env={"basedir": basedir})
        widget.activate_recent(0)
        dlg = widget.findChild(QFileDialog)
        assert dlg is not None
        # calling selectFile when using native (macOS) dialog does not have
        # an effect - at least not immediately;
        dlg.setOption(QFileDialog.DontUseNativeDialog)
        dlg.selectFile(path)
        dlg.accept()
        cur = widget.current_item()
        self.assertTrue(samepath(path, cur.path()))
        self.assertEqual(cur.varPath(),
                         PathItem.VarPath("basedir", "data-regions.tab"))
        self.assertEqual(self.data_regions_options.as_dict(),
                         cur.options().as_dict())

    def test_browse_for_missing_prefixed_parent(self):
        path = self.data_regions_path
        basedir = os.path.join(os.path.dirname(path), "origin1")
        item = (PathItem.VarPath("basedir", "this file does not exist.csv"),
                self.data_regions_options)
        widget = self.create_widget(owcsvimport.OWCSVFileImport,
                                    stored_settings={
                                        "__version__":
                                        3,
                                        "_session_items_v2":
                                        [(item[0].as_dict(), item[1].as_dict())
                                         ]
                                    },
                                    env={"basedir": basedir})
        mb = widget._path_must_be_relative_mb = mock.Mock()
        widget.activate_recent(0)
        dlg = widget.findChild(QFileDialog)
        assert dlg is not None
        # calling selectFile when using native (macOS) dialog does not have
        # an effect - at least not immediately;
        dlg.setOption(QFileDialog.DontUseNativeDialog)
        dlg.selectFile(path)
        dlg.accept()
        mb.assert_called()
        cur = widget.current_item()
        self.assertEqual(item[0], cur.varPath())
        self.assertEqual(item[1].as_dict(), cur.options().as_dict())