示例#1
0
def bench_float(m1=200, m2=200, rseed=0):
    print 79 * '_'
    print " real valued distance metrics"
    print
    np.random.seed(rseed)
    X1 = np.random.random((m1, DTEST))
    X2 = np.random.random((m2, DTEST))
    for (metric, argdict) in METRIC_DICT.iteritems():
        keys = argdict.keys()
        for vals in itertools.product(*argdict.values()):
            kwargs = dict(zip(keys, vals))
            print metric, param_info(kwargs)

            t0 = time()
            try:
                dist_metric = DistanceMetric(metric, **kwargs)
                Yc1 = dist_metric.cdist(X1, X2)
            except Exception as inst:
                print " >>>>>>>>>> error in pyDistances cdist:"
                print "           ", inst
            t1 = time()
            try:
                Yc2 = cdist(X1, X2, metric, **kwargs)
            except Exception as inst:
                print " >>>>>>>>>> error in scipy cdist:"
                print "           ", inst
            t2 = time()
            try:
                dist_metric = DistanceMetric(metric, **kwargs)
                Yp1 = dist_metric.pdist(X1)
            except Exception as inst:
                print " >>>>>>>>>> error in pyDistances pdist:"
                print "           ", inst
            t3 = time()
            try:
                Yp2 = pdist(X1, metric, **kwargs)
            except Exception as inst:
                print " >>>>>>>>>> error in scipy pdist:"
                print "           ", inst
            t4 = time()

            if not np.allclose(Yc1, Yc2):
                print " >>>>>>>>>> FAIL: cdist results don't match"
            if not np.allclose(Yp1, Yp2):
                print " >>>>>>>>>> FAIL: pdist results don't match"
            print " - pyDistances:  c: %.4f sec     p: %.4f sec" % (t1 - t0,
                                                                    t3 - t2)
            print " - scipy:        c: %.4f sec     p: %.4f sec" % (t2 - t1,
                                                                    t4 - t3)

    print ''
示例#2
0
def bench_float(m1=200, m2=200, rseed=0):
    print 79 * '_'
    print " real valued distance metrics"
    print
    np.random.seed(rseed)
    X1 = np.random.random((m1, DTEST))
    X2 = np.random.random((m2, DTEST))
    for (metric, argdict) in METRIC_DICT.iteritems():
        keys = argdict.keys()
        for vals in itertools.product(*argdict.values()):
            kwargs = dict(zip(keys, vals))
            print metric, param_info(kwargs)

            t0 = time()
            try:
                dist_metric = DistanceMetric(metric, **kwargs)
                Yc1 = dist_metric.cdist(X1, X2)
            except Exception as inst:
                print " >>>>>>>>>> error in pyDistances cdist:"
                print "           ", inst
            t1 = time()
            try:
                Yc2 = cdist(X1, X2, metric, **kwargs)
            except Exception as inst:
                print " >>>>>>>>>> error in scipy cdist:"
                print "           ", inst
            t2 = time()
            try:
                dist_metric = DistanceMetric(metric, **kwargs)
                Yp1 = dist_metric.pdist(X1)
            except Exception as inst:
                print " >>>>>>>>>> error in pyDistances pdist:"
                print "           ", inst
            t3 = time()
            try:
                Yp2 = pdist(X1, metric, **kwargs)
            except Exception as inst:
                print " >>>>>>>>>> error in scipy pdist:"
                print "           ", inst
            t4 = time()

            if not np.allclose(Yc1, Yc2):
                print " >>>>>>>>>> FAIL: cdist results don't match"
            if not np.allclose(Yp1, Yp2):
                print " >>>>>>>>>> FAIL: pdist results don't match"
            print " - pyDistances:  c: %.4f sec     p: %.4f sec" % (t1 - t0,
                                                                    t3 - t2)
            print " - scipy:        c: %.4f sec     p: %.4f sec" % (t2 - t1,
                                                                    t4 - t3)

    print ''
示例#3
0
    def _check_metrics_bool(self, k, metric, kwargs):
        bt = BallTree(self.Xbool, metric=metric, **kwargs)
        dist_bt, ind_bt = bt.query(self.Ybool, k=k)

        dm = DistanceMetric(metric=metric, **kwargs)
        D = dm.cdist(self.Ybool, self.Xbool)

        ind_dm = np.argsort(D, 1)[:, :k]
        dist_dm = D[np.arange(self.Ybool.shape[0])[:, None], ind_dm]

        # we don't check the indices here because there are very often
        # ties for nearest neighbors, which cause the test to fail.
        # Distances will be correct in either case.
        assert_array_almost_equal(dist_bt, dist_dm)
示例#4
0
    def _check_metrics_bool(self, k, metric, kwargs):
        bt = BallTree(self.Xbool, metric=metric, **kwargs)
        dist_bt, ind_bt = bt.query(self.Ybool, k=k)

        dm = DistanceMetric(metric=metric, **kwargs)
        D = dm.cdist(self.Ybool, self.Xbool)

        ind_dm = np.argsort(D, 1)[:, :k]
        dist_dm = D[np.arange(self.Ybool.shape[0])[:, None], ind_dm]

        # we don't check the indices here because there are very often
        # ties for nearest neighbors, which cause the test to fail.
        # Distances will be correct in either case.
        assert_array_almost_equal(dist_bt, dist_dm)
示例#5
0
    def test_query_radius_indices(self, n_queries=20):
        # center the data
        X = 2 * self.X - 1

        dm = DistanceMetric()
        D = dm.cdist(X[:n_queries], X)
        r = np.mean(D)

        bt = BallTree(X)
        ind = bt.query_radius(X[:n_queries], r, return_distance=False)
        ind2 = np.zeros(D.shape) + np.arange(D.shape[1])

        ind = np.concatenate(map(np.sort, ind))
        ind2 = ind2[D <= r]

        assert_array_almost_equal(ind, ind2)
示例#6
0
    def test_query_radius_indices(self, n_queries=20):
        # center the data
        X = 2 * self.X - 1

        dm = DistanceMetric()
        D = dm.cdist(X[:n_queries], X)
        r = np.mean(D)

        bt = BallTree(X)
        ind = bt.query_radius(X[:n_queries], r, return_distance=False)
        ind2 = np.zeros(D.shape) + np.arange(D.shape[1])

        ind = np.concatenate(map(np.sort, ind))
        ind2 = ind2[D <= r]

        assert_array_almost_equal(ind, ind2)
示例#7
0
    def test_query_radius_distance(self):
        # center the data
        X = 2 * self.X - 1

        # choose a query point near the origin
        query_pt = 0.01 * X[:1]

        eps = 1E-15  # roundoff error can cause test to fail
        bt = BallTree(X, leaf_size=5)

        # compute reference distances
        dm = DistanceMetric()
        dist_true = dm.cdist(query_pt, X)[0]
        dist_true.sort()

        for r in np.linspace(dist_true[0], dist_true[-1], 10):
            yield (self._check_query_radius_distance,
                   X, bt, query_pt, dist_true, r, eps)
示例#8
0
    def test_query_radius_distance(self):
        # center the data
        X = 2 * self.X - 1

        # choose a query point near the origin
        query_pt = 0.01 * X[:1]

        eps = 1E-15  # roundoff error can cause test to fail
        bt = BallTree(X, leaf_size=5)

        # compute reference distances
        dm = DistanceMetric()
        dist_true = dm.cdist(query_pt, X)[0]
        dist_true.sort()

        for r in np.linspace(dist_true[0], dist_true[-1], 10):
            yield (self._check_query_radius_distance, X, bt, query_pt,
                   dist_true, r, eps)