Пример #1
0
    def test_euclidean_cols(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.cont_data

        dist = distance.Euclidean(data, axis=0, normalize=False)
        assert_almost_equal(
            dist,
            [[0, 8.062257748, 4.242640687], [8.062257748, 0, 5.196152423],
             [4.242640687, 5.196152423, 0]])

        with data.unlocked():
            data.X[1, 1] = np.nan
        dist = distance.Euclidean(data, axis=0, normalize=False)
        assert_almost_equal(
            dist,
            [[0, 6.218252702, 4.242640687], [6.218252702, 0, 2.581988897],
             [4.242640687, 2.581988897, 0]])

        with data.unlocked():
            data.X[1, 0] = np.nan
        dist = distance.Euclidean(data, axis=0, normalize=False)
        assert_almost_equal(
            dist,
            [[0, 6.218252702, 5.830951895], [6.218252702, 0, 2.581988897],
             [5.830951895, 2.581988897, 0]])
Пример #2
0
    def test_euclidean_cols_normalized(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.cont_data

        dist = distance.Euclidean(data, axis=0, normalize=True)
        assert_almost_equal(
            dist,
            [[0, 2.455273959, 0.649839392],
             [2.455273959, 0, 2.473176308],
             [0.649839392, 2.473176308, 0]])

        with data.unlocked():
            data.X[1, 1] = np.nan
        dist = distance.Euclidean(data, axis=0, normalize=True)
        assert_almost_equal(
            dist,
            [[0, 2, 0.649839392],
             [2, 0, 1.704275472],
             [0.649839392, 1.704275472, 0]])

        with data.unlocked():
            data.X[1, 0] = np.nan
        dist = distance.Euclidean(data, axis=0, normalize=True)
        assert_almost_equal(
            dist,
            [[0, 2, 1.450046001],
             [2, 0, 1.704275472],
             [1.450046001, 1.704275472, 0]])
Пример #3
0
    def test_euclidean_cont(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.cont_data

        dist = distance.Euclidean(data, axis=1, normalize=False)
        assert_almost_equal(
            dist,
            np.sqrt(
                np.array([[0, 12, 5, 38], [12, 0, 21, 82], [5, 21, 0, 41],
                          [38, 82, 41, 0]])))

        with data.unlocked():
            data.X[1, 0] = np.nan
        dist = distance.Euclidean(data, axis=1, normalize=False)
        assert_almost_equal(dist, [[0, 4.472135955, 2.236067977, 6.164414003],
                                   [4.472135955, 0, 5.385164807, 6.480740698],
                                   [2.236067977, 5.385164807, 0, 6.403124237],
                                   [6.164414003, 6.480740698, 6.403124237, 0]])

        with data.unlocked():
            data.X[0, 0] = np.nan
        dist = distance.Euclidean(data, axis=1, normalize=False)
        assert_almost_equal(dist, [[0, 5.099019514, 4.795831523, 4.472135955],
                                   [5.099019514, 0, 5.916079783, 6],
                                   [4.795831523, 5.916079783, 0, 6.403124237],
                                   [4.472135955, 6, 6.403124237, 0]])
    def test_two_tables(self):
        assert_almost_equal = np.testing.assert_almost_equal

        dist = distance.Euclidean(self.cont_data,
                                  self.cont_data2,
                                  normalize=True)
        assert_almost_equal(
            dist,
            [
                [1.17040218, 0.47809144],
                [2.78516478, 1.96961039],
                [1.28668394, 0.79282497],
                [1.27179413, 1.54919334],
            ],
        )

        model = distance.Euclidean(normalize=True).fit(self.cont_data)
        dist = model(self.cont_data, self.cont_data2)
        assert_almost_equal(
            dist,
            [
                [1.17040218, 0.47809144],
                [2.78516478, 1.96961039],
                [1.28668394, 0.79282497],
                [1.27179413, 1.54919334],
            ],
        )

        dist = model(self.cont_data2)
        assert_almost_equal(dist, [[0, 0.827119692], [0.827119692, 0]])
    def test_euclidean_cont_normalized(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.cont_data

        model = distance.Euclidean(axis=1, normalize=True).fit(data)
        assert_almost_equal(model.means, [2, 2.75, 1.5])
        assert_almost_equal(model.vars, [9, 2.1875, 1.25])
        assert_almost_equal(model.dist_missing2_cont, [1, 1, 1])

        dist = model(data)
        assert_almost_equal(
            dist,
            [
                [0, 1.654239383, 1.146423008, 1.621286967],
                [1.654239383, 0, 2.068662631, 3.035242727],
                [1.146423008, 2.068662631, 0, 1.956673562],
                [1.621286967, 3.035242727, 1.956673562, 0],
            ],
        )

        dist = distance.Euclidean(data, axis=1, normalize=True)
        assert_almost_equal(
            dist,
            [
                [0, 1.654239383, 1.146423008, 1.621286967],
                [1.654239383, 0, 2.068662631, 3.035242727],
                [1.146423008, 2.068662631, 0, 1.956673562],
                [1.621286967, 3.035242727, 1.956673562, 0],
            ],
        )

        data.X[1, 0] = np.nan
        model = distance.Euclidean(axis=1, normalize=True).fit(data)
        assert_almost_equal(model.means, [3, 2.75, 1.5])
        assert_almost_equal(model.vars, [8, 2.1875, 1.25])
        dist = model(data)
        assert_almost_equal(
            dist,
            [
                [0, 1.806733438, 1.146423008, 1.696635326],
                [1.806733438, 0, 2.192519751, 2.675283697],
                [1.146423008, 2.192519751, 0, 2.019547333],
                [1.696635326, 2.675283697, 2.019547333, 0],
            ],
        )

        data.X[0, 0] = np.nan
        model = distance.Euclidean(axis=1, normalize=True).fit(data)
        assert_almost_equal(model.means, [4, 2.75, 1.5])
        assert_almost_equal(model.vars, [9, 2.1875, 1.25])
        dist = model(data)
        assert_almost_equal(
            dist,
            [
                [0, 1.874642823, 1.521277659, 1.276154939],
                [1.874642823, 0, 2.248809209, 2.580143961],
                [1.521277659, 2.248809209, 0, 1.956673562],
                [1.276154939, 2.580143961, 1.956673562, 0],
            ],
        )
Пример #6
0
    def test_euclidean_disc(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.disc_data

        model = distance.Euclidean().fit(data)
        assert_almost_equal(model.dist_missing_disc,
                            [[1 / 3, 2 / 3, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3],
                             [2 / 3, 1 / 3, 1, 1]])
        assert_almost_equal(model.dist_missing2_disc,
                            [1 - 5 / 9, 1 - 3 / 9, 1 - 5 / 9])

        dist = model(data)
        assert_almost_equal(
            dist, np.sqrt(np.array([[0, 2, 3], [2, 0, 2], [3, 2, 0]])))

        with data.unlocked():
            data.X[1, 0] = np.nan
        model = distance.Euclidean().fit(data)
        assert_almost_equal(model.dist_missing_disc,
                            [[1 / 2, 1 / 2, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3],
                             [2 / 3, 1 / 3, 1, 1]])
        assert_almost_equal(model.dist_missing2_disc,
                            [1 - 2 / 4, 1 - 3 / 9, 1 - 5 / 9])

        with data.unlocked():
            dist = model(data)
        assert_almost_equal(
            dist, np.sqrt(np.array([[0, 2.5, 3], [2.5, 0, 1.5], [3, 1.5, 0]])))

        with data.unlocked():
            data.X[0, 0] = np.nan
        model = distance.Euclidean().fit(data)
        assert_almost_equal(
            model.dist_missing_disc,
            [[1, 0, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3], [2 / 3, 1 / 3, 1, 1]])
        assert_almost_equal(model.dist_missing2_disc,
                            [1 - 1, 1 - 3 / 9, 1 - 5 / 9])

        dist = model(data)
        assert_almost_equal(
            dist, np.sqrt(np.array([[0, 2, 2], [2, 0, 1], [2, 1, 0]])))

        data = self.disc_data4
        with data.unlocked():
            data.X[:2, 0] = np.nan
        model = distance.Euclidean().fit(data)

        assert_almost_equal(model.dist_missing_disc,
                            [[1 / 2, 1 / 2, 1, 1], [3 / 4, 2 / 4, 1, 3 / 4],
                             [3 / 4, 1 / 4, 1, 1]])
        assert_almost_equal(model.dist_missing2_disc,
                            [1 - 2 / 4, 1 - 6 / 16, 1 - 10 / 16])

        dist = model(data)
        assert_almost_equal(
            dist,
            np.sqrt(
                np.array([[0, 2.5, 2.5, 2.5], [2.5, 0, 0.5, 1.5],
                          [2.5, 0.5, 0, 2], [2.5, 1.5, 2, 0]])))
    def test_euclidean_mixed(self):
        assert_almost_equal = np.testing.assert_almost_equal
        data = self.mixed_data

        model = distance.Euclidean(axis=1, normalize=True).fit(data)

        assert_almost_equal(model.means, [1 / 3, 3, 1])
        assert_almost_equal(model.vars, [8 / 9, 8 / 3, 2 / 3])
        assert_almost_equal(
            model.dist_missing_disc,
            [[1 / 3, 2 / 3, 1, 1], [2 / 3, 2 / 3, 1, 2 / 3],
             [2 / 3, 1 / 3, 1, 1]],
        )
        assert_almost_equal(model.dist_missing2_cont, [1, 1, 1])
        assert_almost_equal(model.dist_missing2_disc,
                            [1 - 5 / 9, 1 - 3 / 9, 1 - 5 / 9])
        dist = model(data)
        assert_almost_equal(
            dist,
            [
                [0, 2.828427125, 2.121320344],
                [2.828427125, 0, 2.828427125],
                [2.121320344, 2.828427125, 0],
            ],
        )
Пример #8
0
def build_linkage_matrix(topic_table):
    #print(topic_table.domain)
    x = data.Table(topic_table)
    print(x.X)
    dist_matrix = distance.Euclidean(x.X)
    #d = Orange.misc.distmatrix.__new__(dist_matrix)
    print(dist_matrix)
    #linkage = hierarchical.dist_matrix_linkage(dist_matrix,linkage = hierarchical.AVERAGE
    linkage = scipy.cluster.hierarchy.linkage(dist_matrix,
                                              method=hierarchical.AVERAGE)
    print((linkage))
    return linkage
Пример #9
0
def test_main():
    from PyQt4.QtGui import QApplication
    import sip
    import Orange.distance as distance

    app = QApplication([])
    w = OWHierarchicalClustering()

    data = Orange.data.Table("iris.tab")
    matrix = distance.Euclidean(data)

    w.set_distances(matrix)
    w.handleNewSignals()
    w.show()
    w.raise_()
    rval = app.exec_()
    w.onDeleteWidget()
    sip.delete(w)
    del w
    app.processEvents()
    return rval
Пример #10
0
f1 = 'Hierarchical.csv'
if os.path.exists(f1):
    os.remove(f1)

f2 = 'KMeans.csv'
if os.path.exists(f2):
    os.remove(f2)

data = Orange.data.Table('output.tab')
#matrix = Orange.misc.SymMatrix(len(data))
numDocs = len(data)
print "Count of documents in Reuters dataset: " + str(numDocs) + "\n"
print "1. Constructing Distance Matrices\n"

starter = time.time()
constructorEuclidean = distance.Euclidean()
EuclideanDistanceMat = distance.distance_matrix(
    data, distance_constructor=constructorEuclidean)
euclidean_hierarchical_clustering = clustering.hierarchical.HierarchicalClustering(
)
euclidean_hierarchical_clustering.linkage = clustering.hierarchical.AVERAGE
euclideanRoot = euclidean_hierarchical_clustering(EuclideanDistanceMat)
ender = time.time()
timer = ender - starter

starter1 = time.time()
constructorManhattan = distance.Manhattan()
ManhattanDistanceMat = distance.distance_matrix(
    data, distance_constructor=constructorManhattan)
manhattan_hierarchical_clustering = clustering.hierarchical.HierarchicalClustering(
)
Пример #11
0
    def paint(self, painter, *args):
        if self._line is None:
            self.boundingRect()

        painter.save()
        painter.setPen(self.pen())
        painter.drawLine(self._line)
        painter.restore()


def clusters_at_height(root, height):
    """Return a list of clusters by cutting the clustering at `height`.
    """
    lower = set()
    cluster_list = []
    for cl in preorder(root):
        if cl in lower:
            continue
        if cl.value.height < height:
            cluster_list.append(cl)
            lower.update(preorder(cl))
    return cluster_list


if __name__ == "__main__":  # pragma: no cover
    from Orange import distance
    data = Orange.data.Table("iris")
    matrix = distance.Euclidean(distance._preprocess(data))
    WidgetPreview(OWHierarchicalClustering).run(matrix)
Пример #12
0
import Orange.data
import Orange.misc
from Orange.widgets import widget, gui, settings
from Orange import distance

_METRICS = [
    ("Euclidean", distance.Euclidean()),
    ("Manhattan", distance.Manhattan()),
    ("Cosine", distance.Cosine()),
    ("Jaccard", distance.Jaccard()),
    ("Mahalanobis", distance.Mahalanobis()),
    ("Spearman", distance.SpearmanR()),
    ("Spearman absolute", distance.SpearmanRAbsolute()),
    ("Pearson", distance.PearsonR()),
    ("Pearson absolute", distance.PearsonRAbsolute()),
]


class OWDistances(widget.OWWidget):
    name = "Distances"
    description = "Compute a matrix of pairwise distances."
    icon = "icons/Distance.svg"

    inputs = [("Data", Orange.data.Table, "set_data")]
    outputs = [("Distances", Orange.misc.DistMatrix)]

    axis = settings.Setting(0)
    metric_idx = settings.Setting(0)
    autocommit = settings.Setting(False)

    want_main_area = False