Exemplo n.º 1
0
    def __call__(self, e1, e2=None, axis=None, impute=False):
        assert self.VI is not None, \
            "Mahalanobis distance must be initialized with the fit() method."

        x1 = _orange_to_numpy(e1)
        x2 = _orange_to_numpy(e2)

        if axis is not None:
            assert axis == self.axis, \
                "Axis must match its value at initialization."
        if self.axis == 0:
            x1 = x1.T
            if x2 is not None:
                x2 = x2.T
        if not x1.shape[1] == self.VI.shape[0] or \
                x2 is not None and not x2.shape[1] == self.VI.shape[0]:
            raise ValueError('Incorrect number of features.')

        dist = skl_metrics.pairwise.pairwise_distances(
                x1, x2, metric='mahalanobis', VI=self.VI)
        if np.isnan(dist).any() and impute:
            dist = np.nan_to_num(dist)
        if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance):
            dist = DistMatrix(dist, e1, e2, self.axis)
        else:
            dist = DistMatrix(dist)
        return dist
Exemplo n.º 2
0
    def __call__(self, e1, e2=None, axis=1, **kwargs):
        """
        Method for calculating distances.

        :param e1: input data instances
        :type e1: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray`
        :param e2: optional second argument for data instances
           if provided, distances between each pair, where first item is from e1 and second is from e2, are calculated
        :type e2: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray`
        :param axis: if axis=1 we calculate distances between rows,
           if axis=0 we calculate distances between columns
        :type axis: int
        :param kwargs: used just for Mahalanobis for passing inverse of covariance matrix
        :return: the matrix with distances between given examples
        :rtype: :class:`Orange.misc.DistMatrix`
        """
        x1 = _orange_to_numpy(e1)
        x2 = _orange_to_numpy(e2)
        if axis == 0:
            x1 = x1.T
            if x2 is not None:
                x2 = x2.T
        if not sparse.issparse(x1):
            x1 = np.atleast_2d(x1)
        if e2 is not None and not sparse.issparse(x2):
            x2 = np.atleast_2d(x2)
        dist = metrics.pairwise.pairwise_distances(x1,
                                                   x2,
                                                   metric=self.metric,
                                                   **kwargs)
        if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance):
            dist = DistMatrix(dist, e1, e2)
        else:
            dist = DistMatrix(dist)
        return dist
Exemplo n.º 3
0
 def __call__(self, e1, e2=None, axis=1, impute=False):
     x1 = _orange_to_numpy(e1)
     x2 = _orange_to_numpy(e2)
     if x2 is None:
         x2 = x1
     if x1.ndim == 1 or x2.ndim == 1:
         axis = 0
         slc = len(x1) if x1.ndim > 1 else 1
     else:
         slc = len(x1) if axis == 1 else x1.shape[1]
     # stats.spearmanr does not work when e1=Table and e2=RowInstance
     # so we replace e1 and e2 and then transpose the result
     transpose = False
     if x1.ndim == 2 and x2.ndim == 1:
         x1, x2 = x2, x1
         slc = len(e1) if x1.ndim > 1 else 1
         transpose = True
     rho, _ = stats.spearmanr(x1, x2, axis=axis)
     if np.isnan(rho).any() and impute:
         rho = np.nan_to_num(rho)
     if self.absolute:
         dist = (1. - np.abs(rho)) / 2.
     else:
         dist = (1. - rho) / 2.
     if isinstance(dist, np.float):
         dist = np.array([[dist]])
     elif isinstance(dist, np.ndarray):
         dist = dist[:slc, slc:]
     if transpose:
         dist = dist.T
     if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance):
         dist = DistMatrix(dist, e1, e2, axis)
     else:
         dist = DistMatrix(dist)
     return dist
Exemplo n.º 4
0
    def test_numpy_type(self):
        """GH-3658"""
        data1 = np.array([1, 2], dtype=np.int64)
        data2 = np.array([2, 3], dtype=np.int64)
        dm1, dm2 = DistMatrix(data1), DistMatrix(data2)

        self.assertIsInstance(dm1.max(), np.int64)
        self.assertNotIsInstance(dm1.max(), int)
        with self.assertRaises(AssertionError):
            np.testing.assert_array_equal(dm1, dm2)
Exemplo n.º 5
0
    def __call__(self, e1, e2=None, axis=1):
        """
        :param e1: input data instances
        :type e1: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray`
        :param e2: optional second argument for data instances
           if provided, distances between each pair, where first item is from e1 and second is from e2, are calculated
        :type e2: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray`
        :param axis: if axis=1 we calculate distances between rows,
           if axis=0 we calculate distances between columns
        :type axis: int
        :return: the matrix with distances between given examples
        :rtype: :class:`Orange.misc.DistMatrix`

        Returns Spearman's dissimilarity between e1 and e2,
        i.e.

        .. math:: (1-r)/2

        where r is Spearman's rank coefficient.
        """
        x1 = _orange_to_numpy(e1)
        x2 = _orange_to_numpy(e2)
        if x2 is None:
            x2 = x1
        if x1.ndim == 1 or x2.ndim == 1:
            axis = 0
            slc = len(x1) if x1.ndim > 1 else 1
        else:
            slc = len(x1) if axis == 1 else x1.shape[1]
        # stats.spearmanr does not work when e1=Table and e2=RowInstance
        # so we replace e1 and e2 and then transpose the result
        transpose = False
        if x1.ndim == 2 and x2.ndim == 1:
            x1, x2 = x2, x1
            slc = len(e1) if x1.ndim > 1 else 1
            transpose = True
        rho, _ = stats.spearmanr(x1, x2, axis=axis)
        if self.absolute:
            dist = (1. - np.abs(rho)) / 2.
        else:
            dist = (1. - rho) / 2.
        if isinstance(dist, np.float):
            dist = np.array([[dist]])
        elif isinstance(dist, np.ndarray):
            dist = dist[:slc, slc:]
        if transpose:
            dist = dist.T
        if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance):
            dist = DistMatrix(dist, e1, e2)
        else:
            dist = DistMatrix(dist)
        return dist
Exemplo n.º 6
0
 def __call__(self, e1, e2=None, axis=1, impute=False):
     x1 = _orange_to_numpy(e1)
     x2 = _orange_to_numpy(e2)
     if axis == 0:
         x1 = x1.T
         if x2 is not None:
             x2 = x2.T
     dist = skl_metrics.pairwise.pairwise_distances(
             x1, x2, metric=self.metric)
     if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance):
         dist = DistMatrix(dist, e1, e2, axis)
     else:
         dist = DistMatrix(dist)
     return dist
Exemplo n.º 7
0
 def __call__(self, e1, e2=None, axis=1, impute=False):
     x1 = _orange_to_numpy(e1)
     x2 = _orange_to_numpy(e2)
     if axis == 0:
         x1 = x1.T
         if x2 is not None:
             x2 = x2.T
     dist = skl_metrics.pairwise.pairwise_distances(
         x1, x2, metric=self.metric)
     if impute and np.isnan(dist).any():
         dist = np.nan_to_num(dist)
     if isinstance(e1, (Table, RowInstance)):
         dist_matrix = DistMatrix(dist, e1, e2, axis)
     else:
         dist_matrix = DistMatrix(dist)
     return dist_matrix
Exemplo n.º 8
0
 def test_deprecated(self):
     a9 = np.arange(9).reshape(3, 3)
     m = DistMatrix(a9)
     with self.assertWarns(OrangeDeprecationWarning):
         self.assertEqual(m.dim, 3)
     with self.assertWarns(OrangeDeprecationWarning):
         np.testing.assert_almost_equal(m.X, a9)
 def setUp(self):
     self.widget = self.create_widget(OWMDS,
                                      stored_settings={
                                          "__version__": 2,
                                          "max_iter": 10,
                                          "initialization": OWMDS.PCA,
                                      })  # type: OWMDS
     self.towns = DistMatrix.from_file(
         os.path.join(self.datasets_dir, "slovenian-towns.dst"))
Exemplo n.º 10
0
 def test_input_distance_no_data(self):
     widget = self.widget
     matrix = DistMatrix(np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]]),
                         row_items=None)
     self.send_signal(widget.Inputs.data, matrix, widget=widget)
     self.assertTrue(widget.Error.input_validation_error.is_shown())
     self.assertIsNone(widget.data)
     self.assertIsNone(widget.distances)
     self.send_signal(widget.Inputs.data, None, widget=widget)
     self.assertFalse(widget.Error.input_validation_error.is_shown())
Exemplo n.º 11
0
 def setUp(self):
     self.widget = self.create_widget(
         OWMDS, stored_settings={
             "__version__": 2,
             "max_iter": 10,
             "initialization": OWMDS.PCA,
         }
     )  # type: OWMDS
     self.towns = DistMatrix.from_file(
         os.path.join(self.datasets_dir, "slovenian-towns.dst"))
Exemplo n.º 12
0
 def __call__(self, e1, e2=None, axis=1):
     x1 = _orange_to_numpy(e1)
     x2 = _orange_to_numpy(e2)
     if axis == 0:
         x1 = x1.T
         if x2 is not None:
             x2 = x2.T
     if not sparse.issparse(x1):
         x1 = np.atleast_2d(x1)
     if e2 is not None and not sparse.issparse(x2):
         x2 = np.atleast_2d(x2)
     dist = skl_metrics.pairwise.pairwise_distances(x1,
                                                    x2,
                                                    metric=self.metric)
     if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance):
         dist = DistMatrix(dist, e1, e2, axis)
     else:
         dist = DistMatrix(dist)
     return dist
Exemplo n.º 13
0
    def _save_and_load(self):
        widget = self.widget
        widget.auto_save = False

        with named_file("", suffix=".dst") as filename:
            widget.get_save_filename = Mock(return_value=(filename,
                                                          widget.filters[0]))

            self.send_signal(widget.Inputs.distances, self.distances)
            widget.save_file_as()
            return DistMatrix.from_file(filename)
Exemplo n.º 14
0
 def __call__(self, e1, e2=None, axis=1, impute=False):
     x1 = _orange_to_numpy(e1)
     x2 = _orange_to_numpy(e2)
     if x2 is None:
         x2 = x1
     if axis == 0:
         x1 = x1.T
         x2 = x2.T
     rho = np.array([[stats.pearsonr(i, j)[0] for j in x2] for i in x1])
     if np.isnan(rho).any() and impute:
         rho = np.nan_to_num(rho)
     if self.absolute:
         dist = (1. - np.abs(rho)) / 2.
     else:
         dist = (1. - rho) / 2.
     if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance):
         dist = DistMatrix(dist, e1, e2, axis)
     else:
         dist = DistMatrix(dist)
     return dist
Exemplo n.º 15
0
    def __call__(self, e1, e2=None, axis=1):
        """
        :param e1: input data instances
        :type e1: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray`
        :param e2: optional second argument for data instances
           if provided, distances between each pair, where first item is from e1 and second is from e2, are calculated
        :type e2: :class:`Orange.data.Table` or :class:`Orange.data.RowInstance` or :class:`numpy.ndarray`
        :param axis: if axis=1 we calculate distances between rows,
           if axis=0 we calculate distances between columns
        :type axis: int
        :return: the matrix with distances between given examples
        :rtype: :class:`Orange.misc.DistMatrix`

        Returns Pearson's dissimilarity between e1 and e2,
        i.e.

        .. math:: (1-r)/2

        where r is Pearson's rank coefficient.
        """
        x1 = _orange_to_numpy(e1)
        x2 = _orange_to_numpy(e2)
        if x2 is None:
            x2 = x1
        if axis == 0:
            x1 = x1.T
            x2 = x2.T
        if x1.ndim == 1:
            x1 = list([x1])
        if x2.ndim == 1:
            x2 = list([x2])
        rho = np.array([[stats.pearsonr(i, j)[0] for j in x2] for i in x1])
        if self.absolute:
            dist = (1. - np.abs(rho)) / 2.
        else:
            dist = (1. - rho) / 2.
        if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance):
            dist = DistMatrix(dist, e1, e2)
        else:
            dist = DistMatrix(dist)
        return dist
Exemplo n.º 16
0
 def __call__(self, e1, e2=None, axis=1):
     x1 = _orange_to_numpy(e1)
     x2 = _orange_to_numpy(e2)
     if x2 is None:
         x2 = x1
     if axis == 0:
         x1 = x1.T
         x2 = x2.T
     if x1.ndim == 1:
         x1 = list([x1])
     if x2.ndim == 1:
         x2 = list([x2])
     rho = np.array([[stats.pearsonr(i, j)[0] for j in x2] for i in x1])
     if self.absolute:
         dist = (1. - np.abs(rho)) / 2.
     else:
         dist = (1. - rho) / 2.
     if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance):
         dist = DistMatrix(dist, e1, e2, axis)
     else:
         dist = DistMatrix(dist)
     return dist
Exemplo n.º 17
0
 def __call__(self, e1, e2=None, axis=1, impute=False):
     x1 = _orange_to_numpy(e1)
     x2 = _orange_to_numpy(e2)
     if x2 is None:
         x2 = x1
     slc = len(x1) if axis == 1 else x1.shape[1]
     rho, _ = stats.spearmanr(x1, x2, axis=axis)
     if np.isnan(rho).any() and impute:
         rho = np.nan_to_num(rho)
     if self.absolute:
         dist = (1. - np.abs(rho)) / 2.
     else:
         dist = (1. - rho) / 2.
     if isinstance(dist, np.float):
         dist = np.array([[dist]])
     elif isinstance(dist, np.ndarray):
         dist = dist[:slc, slc:]
     if isinstance(e1, data.Table) or isinstance(e1, data.RowInstance):
         dist = DistMatrix(dist, e1, e2, axis)
     else:
         dist = DistMatrix(dist)
     return dist
Exemplo n.º 18
0
    def test_save(self):
        with named_file(
            """3 axis=1 row_labels
                danny	0.12	3.45	6.78
                eve 	9.01	2.34	5.67
                frank	8.90""") as name:
            m = DistMatrix.from_file(name)
            m.save(name)
            m = DistMatrix.from_file(name)
            np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90],
                                                        [9.01, 2.34, 0],
                                                        [8.90, 0, 0]]))
            self.assertIsInstance(m.row_items, Table)
            self.assertIsNone(m.col_items)
            self.assertEqual([e.metas[0] for e in m.row_items],
                             ["danny", "eve", "frank"])
            self.assertEqual(m.axis, 1)

        with named_file(
            """3 axis=0 asymmetric col_labels row_labels
                         ann	bert	chad
                danny	0.12	3.45	6.78
                  eve	9.01	2.34	5.67
                frank	8.90	1.23	4.56""") as name:
            m = DistMatrix.from_file(name)
            m.save(name)
            m = DistMatrix.from_file(name)
            np.testing.assert_almost_equal(m, np.array([[0.12, 3.45, 6.78],
                                                        [9.01, 2.34, 5.67],
                                                        [8.90, 1.23, 4.56]]))
            self.assertIsInstance(m.row_items, Table)
            self.assertIsInstance(m.col_items, Table)
            self.assertEqual([e.metas[0] for e in m.col_items],
                             ["ann", "bert", "chad"])
            self.assertEqual([e.metas[0] for e in m.row_items],
                             ["danny", "eve", "frank"])
            self.assertEqual(m.axis, 0)
Exemplo n.º 19
0
    def __call__(self, e1, e2=None):
        """
        If e2 is omitted, calculate distances between all rows (axis=1) or
        columns (axis=2) of e1. If e2 is present, calculate distances between
        all pairs if rows from e1 and e2.

        This method converts the data into numpy arrays, calls the method
        `compute_data` and packs the result into `DistMatrix`. Subclasses are
        expected to define the `compute_data` and not the `__call__` method.

        Args:
            e1 (Orange.data.Table or Orange.data.Instance or numpy.ndarray):
                input data
            e2 (Orange.data.Table or Orange.data.Instance or numpy.ndarray):
                secondary data

        Returns:
            A distance matrix (Orange.misc.distmatrix.DistMatrix)
        """
        if self.axis == 0 and e2 is not None:
            # Backward compatibility fix
            if e2 is e1:
                e2 = None
            else:
                raise ValueError("Two tables cannot be compared by columns")

        x1 = _orange_to_numpy(e1)
        x2 = _orange_to_numpy(e2)
        with np.errstate(invalid="ignore"):  # nans are handled below
            dist = self.compute_distances(x1, x2)
            if self.impute and np.isnan(dist).any():
                dist = np.nan_to_num(dist)
            if isinstance(e1, (Table, RowInstance)):
                dist = DistMatrix(dist, e1, e2, self.axis)
            else:
                dist = DistMatrix(dist)
            return dist
Exemplo n.º 20
0
    def test_save(self):
        with named_file(
            """3 axis=1 row_labels
                danny	0.12	3.45	6.78
                eve 	9.01	2.34	5.67
                frank	8.90""") as name:
            m = DistMatrix.from_file(name)
            m.save(name)
            m = DistMatrix.from_file(name)
            np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90],
                                                        [9.01, 2.34, 0],
                                                        [8.90, 0, 0]]))
            self.assertIsInstance(m.row_items, Table)
            self.assertIsNone(m.col_items)
            self.assertEqual([e.metas[0] for e in m.row_items],
                             ["danny", "eve", "frank"])
            self.assertEqual(m.axis, 1)

        with named_file(
            """3 axis=0 asymmetric col_labels row_labels
                         ann	bert	chad
                danny	0.12	3.45	6.78
                  eve	9.01	2.34	5.67
                frank	8.90	1.23	4.56""") as name:
            m = DistMatrix.from_file(name)
            m.save(name)
            m = DistMatrix.from_file(name)
            np.testing.assert_almost_equal(m, np.array([[0.12, 3.45, 6.78],
                                                        [9.01, 2.34, 5.67],
                                                        [8.90, 1.23, 4.56]]))
            self.assertIsInstance(m.row_items, Table)
            self.assertIsInstance(m.col_items, Table)
            self.assertEqual([e.metas[0] for e in m.col_items],
                             ["ann", "bert", "chad"])
            self.assertEqual([e.metas[0] for e in m.row_items],
                             ["danny", "eve", "frank"])
            self.assertEqual(m.axis, 0)
Exemplo n.º 21
0
    def compute_distances(self, data):

        nb_data = len(data)
        nb_domain = len(data.domain)
        distances = [[] for _ in range(nb_data)]

        # look for the biggest and the lowest value of each continuous column
        min_max = []
        for i in range(nb_domain):
            if isinstance(data.domain[i], ContinuousVariable):
                min_max.append(
                    [data[0][data.domain[i]], data[1][data.domain[i]]])
                for j in range(1, nb_data):
                    if data[j][data.domain[i]] < min_max[-1][0]:
                        min_max[-1][0] = data[j][data.domain[i]]
                    if data[j][data.domain[i]] > min_max[-1][1]:
                        min_max[-1][1] = data[j][data.domain[i]]

        # Compute the difference between the biggest and lowest value for each continuous columns
        diff_extrema = [extrema[1] - extrema[0] for extrema in min_max]

        # Compute the distances between each data
        for i in range(nb_data):
            for j in range(i, nb_data):

                sum_continuous_values = 0  # Sum of the squares of the difference of two data for each continuous columns
                sum_discrete_values = 0  # Sum of distances of two points for each discrete columns (0 if equals, 1 else)

                for k in range(nb_domain):
                    if isinstance(data.domain[k], ContinuousVariable):
                        if diff_extrema[k] != 0:
                            sum_continuous_values += (
                                (data[i][data.domain[k]] -
                                 data[j][data.domain[k]]) /
                                (diff_extrema[k]))**2
                    else:
                        if data[i][data.domain[k]] != data[j][data.domain[k]]:
                            sum_discrete_values += 1

                sum = (sqrt(sum_continuous_values) +
                       sum_discrete_values) / nb_domain
                distances[i].append(sum)
                if i != j:
                    distances[j].append(sum)

        return DistMatrix(np.array(distances))
Exemplo n.º 22
0
    def open_file(self):
        self.error()
        self.warning()
        self.information()

        fn = self.last_path()
        if not fn:
            return
        if not os.path.exists(fn):
            dir_name, basename = os.path.split(fn)
            if os.path.exists(os.path.join(".", basename)):
                fn = os.path.join(".", basename)
                self.information("Loading '{}' from the current directory."
                                 .format(basename))
        if fn == "(none)":
            self.send("Distances", None)
            self.infoa.setText("No data loaded")
            self.infob.setText("")
            self.warnings.setText("")
            return

        self.loaded_file = ""

        try:
            distances = DistMatrix.from_file(fn)
            self.loaded_file = fn
        except Exception as exc:
            err_value = str(exc)
            self.error("Invalid file format")
            self.infoa.setText('Data was not loaded due to an error.')
            self.warnings.setText(err_value)
            distances = None

        if distances is not None:
            self.infoa.setText(
                "{} points(s), ".format(len(distances)) +
                (["unlabelled", "labelled"][distances.row_items is not None]))
            self.warnings.setText("")
            file_name = os.path.split(fn)[1]
            if "." in file_name:
                distances.name = file_name[:file_name.rfind('.')]
            else:
                distances.name = file_name

        self.send("Distances", distances)
Exemplo n.º 23
0
    def open_file(self):
        self.clear_messages()
        fn = self.last_path()
        if not fn:
            return
        if not os.path.exists(fn):
            dir_name, basename = os.path.split(fn)
            if os.path.exists(os.path.join(".", basename)):
                fn = os.path.join(".", basename)
                self.information(
                    "Loading '{}' from the current directory.".format(
                        basename))
        if fn == "(none)":
            self.Outputs.distances.send(None)
            self.infoa.setText("No data loaded")
            self.infob.setText("")
            self.warnings.setText("")
            return

        self.loaded_file = ""

        try:
            distances = DistMatrix.from_file(fn)
            self.loaded_file = fn
        except Exception as exc:
            err_value = str(exc)
            self.error("Invalid file format")
            self.infoa.setText('错误,数据未加载。')
            self.warnings.setText(err_value)
            distances = None

        if distances is not None:
            self.infoa.setText("{}  个点, ".format(len(distances)) + (
                ["未标记", "已标记"][distances.row_items is not None]))
            self.warnings.setText("")
            file_name = os.path.split(fn)[1]
            if "." in file_name:
                distances.name = file_name[:file_name.rfind('.')]
            else:
                distances.name = file_name

        self.Outputs.distances.send(distances)
    def setUp(self):
        self.widget = self.create_widget(
            OWNxFromDistances)  # type: OWNxFromDistances
        self.data = Table("iris")
        self.distances = Euclidean(self.data)

        # When converted to a graph, this has the following components:
        # At threshold 0.5:  {1, 6} and disconnected {0}, {2}, {3}, {4}, {5}
        # At threshold 1 {0, 1, 2, 6}, {3, 5}, {4}
        # At threshold 2 {0, 1, 2, 3, 5, 6}, {4}
        m = np.full((7, 7), 10.0)
        m[1, 6] = m[6, 1] = 0.5

        m[0, 1] = m[1, 2] = m[2, 6] = m[0, 6] = 1
        m[1, 0] = m[2, 1] = m[6, 2] = m[6, 0] = 1

        m[3, 5] = m[5, 3] = 1

        m[2, 3] = m[3, 2] = 2
        self.distances1 = DistMatrix(m)
Exemplo n.º 25
0
    def test_from_file(self):
        with named_file("""3 axis=0 asymmetric col_labels row_labels
                    ann	bert	chad
                    danny	0.12	3.45	6.78
                    eve	9.01	2.34	5.67
                    frank	8.90	1.23	4.56""") as name:
            m = DistMatrix.from_file(name)
            np.testing.assert_almost_equal(
                m,
                np.array([[0.12, 3.45, 6.78], [9.01, 2.34, 5.67],
                          [8.90, 1.23, 4.56]]))
            self.assertIsInstance(m.row_items, Table)
            self.assertIsInstance(m.col_items, Table)
            self.assertEqual([e.metas[0] for e in m.col_items],
                             ["ann", "bert", "chad"])
            self.assertEqual([e.metas[0] for e in m.row_items],
                             ["danny", "eve", "frank"])
            self.assertEqual(m.axis, 0)

        with named_file("""3 axis=1 row_labels
                    danny	0.12	3.45	6.78
                    eve 	9.01	2.34	5.67
                    frank	8.90""") as name:
            m = DistMatrix.from_file(name)
            np.testing.assert_almost_equal(
                m, np.array([[0.12, 9.01, 8.90], [9.01, 2.34, 0], [8.90, 0,
                                                                   0]]))
            self.assertIsInstance(m.row_items, Table)
            self.assertIsNone(m.col_items)
            self.assertEqual([e.metas[0] for e in m.row_items],
                             ["danny", "eve", "frank"])
            self.assertEqual(m.axis, 1)

        with named_file("""3 axis=1 symmetric
                0.12	3.45	6.78
                9.01	2.34	5.67
                8.90""") as name:
            m = DistMatrix.from_file(name)
        np.testing.assert_almost_equal(
            m, np.array([[0.12, 9.01, 8.90], [9.01, 2.34, 0], [8.90, 0, 0]]))

        with named_file("""3 row_labels
                starič	0.12	3.45	6.78
                aleš	9.01	2.34	5.67
                anže	8.90""",
                        encoding="utf-8"
                        "") as name:
            m = DistMatrix.from_file(name)
            np.testing.assert_almost_equal(
                m, np.array([[0.12, 9.01, 8.90], [9.01, 2.34, 0], [8.90, 0,
                                                                   0]]))
            self.assertIsInstance(m.row_items, Table)
            self.assertIsNone(m.col_items)
            self.assertEqual([e.metas[0] for e in m.row_items],
                             ["starič", "aleš", "anže"])
            self.assertEqual(m.axis, 1)

        def assertErrorMsg(content, msg):
            with named_file(content) as name:
                with self.assertRaises(ValueError) as cm:
                    DistMatrix.from_file(name)
                self.assertEqual(str(cm.exception), msg)

        assertErrorMsg("", "empty file")
        assertErrorMsg("axis=1\n1\t3\n4",
                       "distance file must begin with dimension")
        assertErrorMsg("3 col_labels\na\tb\n1\n\2\n3",
                       "mismatching number of column labels")
        assertErrorMsg("3 col_labels\na\tb\tc\td\n1\n\2\n3",
                       "mismatching number of column labels")
        assertErrorMsg("2\n  1\t2\t3\n  5", "too many columns in matrix row 1")
        assertErrorMsg("2 row_labels\na\t1\t2\t3\nb\t5",
                       "too many columns in matrix row 'a'")
        assertErrorMsg("2 noflag\n  1\t2\t3\n  5", "invalid flag 'noflag'")
        assertErrorMsg("2 noflag=5\n  1\t2\t3\n  5", "invalid flag 'noflag=5'")
        assertErrorMsg("2\n1\n2\n3", "too many rows")
        assertErrorMsg("2\n1\nasd", "invalid element at row 2, column 1")
        assertErrorMsg("2 row_labels\na\t1\nb\tasd",
                       "invalid element at row 'b', column 1")
        assertErrorMsg("2 col_labels row_labels\nd\te\na\t1\nb\tasd",
                       "invalid element at row 'b', column 'd'")
        assertErrorMsg("2 col_labels\nd\te\n1\nasd",
                       "invalid element at row 2, column 'd'")
Exemplo n.º 26
0
 def assertErrorMsg(content, msg):
     with named_file(content) as name:
         with self.assertRaises(ValueError) as cm:
             DistMatrix.from_file(name)
         self.assertEqual(str(cm.exception), msg)
Exemplo n.º 27
0
    def test_from_file(self):
        with named_file(
                """3 axis=0 asymmetric col_labels row_labels
                    ann	bert	chad
                    danny	0.12	3.45	6.78
                    eve	9.01	2.34	5.67
                    frank	8.90	1.23	4.56""") as name:
            m = DistMatrix.from_file(name)
            np.testing.assert_almost_equal(m, np.array([[0.12, 3.45, 6.78],
                                                        [9.01, 2.34, 5.67],
                                                        [8.90, 1.23, 4.56]]))
            self.assertIsInstance(m.row_items, Table)
            self.assertIsInstance(m.col_items, Table)
            self.assertEqual([e.metas[0] for e in m.col_items],
                             ["ann", "bert", "chad"])
            self.assertEqual([e.metas[0] for e in m.row_items],
                             ["danny", "eve", "frank"])
            self.assertEqual(m.axis, 0)

        with named_file(
                """3 axis=1 row_labels
                    danny	0.12	3.45	6.78
                    eve 	9.01	2.34	5.67
                    frank	8.90""") as name:
            m = DistMatrix.from_file(name)
            np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90],
                                                        [9.01, 2.34, 0],
                                                        [8.90, 0, 0]]))
            self.assertIsInstance(m.row_items, Table)
            self.assertIsNone(m.col_items)
            self.assertEqual([e.metas[0] for e in m.row_items],
                             ["danny", "eve", "frank"])
            self.assertEqual(m.axis, 1)

        with named_file(
            """3 axis=1 symmetric
                0.12	3.45	6.78
                9.01	2.34	5.67
                8.90""") as name:
            m = DistMatrix.from_file(name)
        np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90],
                                                    [9.01, 2.34, 0],
                                                    [8.90, 0, 0]]))

        with named_file(
            """3 row_labels
                starič	0.12	3.45	6.78
                aleš	9.01	2.34	5.67
                anže	8.90""", encoding="utf-8""") as name:
            m = DistMatrix.from_file(name)
            np.testing.assert_almost_equal(m, np.array([[0.12, 9.01, 8.90],
                                                        [9.01, 2.34, 0],
                                                        [8.90, 0, 0]]))
            self.assertIsInstance(m.row_items, Table)
            self.assertIsNone(m.col_items)
            self.assertEqual([e.metas[0] for e in m.row_items],
                             ["starič", "aleš", "anže"])
            self.assertEqual(m.axis, 1)

        def assertErrorMsg(content, msg):
            with named_file(content) as name:
                with self.assertRaises(ValueError) as cm:
                    DistMatrix.from_file(name)
                self.assertEqual(str(cm.exception), msg)

        assertErrorMsg("",
                       "empty file")
        assertErrorMsg("axis=1\n1\t3\n4",
                       "distance file must begin with dimension")
        assertErrorMsg("3 col_labels\na\tb\n1\n\2\n3",
                       "mismatching number of column labels")
        assertErrorMsg("3 col_labels\na\tb\tc\td\n1\n\2\n3",
                       "mismatching number of column labels")
        assertErrorMsg("2\n  1\t2\t3\n  5",
                       "too many columns in matrix row 1")
        assertErrorMsg("2 row_labels\na\t1\t2\t3\nb\t5",
                       "too many columns in matrix row 'a'")
        assertErrorMsg("2 noflag\n  1\t2\t3\n  5",
                       "invalid flag 'noflag'")
        assertErrorMsg("2 noflag=5\n  1\t2\t3\n  5",
                       "invalid flag 'noflag=5'")
        assertErrorMsg("2\n1\n2\n3",
                       "too many rows")
        assertErrorMsg("2\n1\nasd",
                       "invalid element at row 2, column 1")
        assertErrorMsg("2 row_labels\na\t1\nb\tasd",
                       "invalid element at row 'b', column 1")
        assertErrorMsg("2 col_labels row_labels\nd\te\na\t1\nb\tasd",
                       "invalid element at row 'b', column 'd'")
        assertErrorMsg("2 col_labels\nd\te\n1\nasd",
                       "invalid element at row 2, column 'd'")
Exemplo n.º 28
0
 def assertErrorMsg(content, msg):
     with named_file(content) as name:
         with self.assertRaises(ValueError) as cm:
             DistMatrix.from_file(name)
         self.assertEqual(str(cm.exception), msg)