def bench_float(m1=200, m2=200, rseed=0): print 79 * '_' print " real valued distance metrics" print np.random.seed(rseed) X1 = np.random.random((m1, DTEST)) X2 = np.random.random((m2, DTEST)) for (metric, argdict) in METRIC_DICT.iteritems(): keys = argdict.keys() for vals in itertools.product(*argdict.values()): kwargs = dict(zip(keys, vals)) print metric, param_info(kwargs) t0 = time() try: dist_metric = DistanceMetric(metric, **kwargs) Yc1 = dist_metric.cdist(X1, X2) except Exception as inst: print " >>>>>>>>>> error in pyDistances cdist:" print " ", inst t1 = time() try: Yc2 = cdist(X1, X2, metric, **kwargs) except Exception as inst: print " >>>>>>>>>> error in scipy cdist:" print " ", inst t2 = time() try: dist_metric = DistanceMetric(metric, **kwargs) Yp1 = dist_metric.pdist(X1) except Exception as inst: print " >>>>>>>>>> error in pyDistances pdist:" print " ", inst t3 = time() try: Yp2 = pdist(X1, metric, **kwargs) except Exception as inst: print " >>>>>>>>>> error in scipy pdist:" print " ", inst t4 = time() if not np.allclose(Yc1, Yc2): print " >>>>>>>>>> FAIL: cdist results don't match" if not np.allclose(Yp1, Yp2): print " >>>>>>>>>> FAIL: pdist results don't match" print " - pyDistances: c: %.4f sec p: %.4f sec" % (t1 - t0, t3 - t2) print " - scipy: c: %.4f sec p: %.4f sec" % (t2 - t1, t4 - t3) print ''
def bench_float(m1=200, m2=200, rseed=0): print 79 * '_' print " real valued distance metrics" print np.random.seed(rseed) X1 = np.random.random((m1, DTEST)) X2 = np.random.random((m2, DTEST)) for (metric, argdict) in METRIC_DICT.iteritems(): keys = argdict.keys() for vals in itertools.product(*argdict.values()): kwargs = dict(zip(keys, vals)) print metric, param_info(kwargs) t0 = time() try: dist_metric = DistanceMetric(metric, **kwargs) Yc1 = dist_metric.cdist(X1, X2) except Exception as inst: print " >>>>>>>>>> error in pyDistances cdist:" print " ", inst t1 = time() try: Yc2 = cdist(X1, X2, metric, **kwargs) except Exception as inst: print " >>>>>>>>>> error in scipy cdist:" print " ", inst t2 = time() try: dist_metric = DistanceMetric(metric, **kwargs) Yp1 = dist_metric.pdist(X1) except Exception as inst: print " >>>>>>>>>> error in pyDistances pdist:" print " ", inst t3 = time() try: Yp2 = pdist(X1, metric, **kwargs) except Exception as inst: print " >>>>>>>>>> error in scipy pdist:" print " ", inst t4 = time() if not np.allclose(Yc1, Yc2): print " >>>>>>>>>> FAIL: cdist results don't match" if not np.allclose(Yp1, Yp2): print " >>>>>>>>>> FAIL: pdist results don't match" print " - pyDistances: c: %.4f sec p: %.4f sec" % (t1 - t0, t3 - t2) print " - scipy: c: %.4f sec p: %.4f sec" % (t2 - t1, t4 - t3) print ''
def _check_metrics_float(self, k, metric, kwargs): bt = BallTree(self.X, metric=metric, **kwargs) dist_bt, ind_bt = bt.query(self.X, k=k) dm = DistanceMetric(metric=metric, **kwargs) D = dm.pdist(self.X, squareform=True) ind_dm = np.argsort(D, 1)[:, :k] dist_dm = D[np.arange(self.X.shape[0])[:, None], ind_dm] # we don't check the indices here because if there is a tie for # nearest neighbor, then the test may fail. Distances will reflect # whether the search was successful assert_array_almost_equal(dist_bt, dist_dm)
def _check_metrics_float(self, k, metric, kwargs): bt = BallTree(self.X, metric=metric, **kwargs) dist_bt, ind_bt = bt.query(self.X, k=k) dm = DistanceMetric(metric=metric, **kwargs) D = dm.pdist(self.X, squareform=True) ind_dm = np.argsort(D, 1)[:, :k] dist_dm = D[np.arange(self.X.shape[0])[:, None], ind_dm] # we don't check the indices here because if there is a tie for # nearest neighbor, then the test may fail. Distances will reflect # whether the search was successful assert_array_almost_equal(dist_bt, dist_dm)
def test_query_radius_count(self): # center the data X = 2 * self.X - 1 dm = DistanceMetric() D = dm.pdist(X, squareform=True) r = np.mean(D) bt = BallTree(X) count1 = bt.query_radius(X, r, count_only=True) count2 = (D <= r).sum(1) assert_array_almost_equal(count1, count2)
def test_query_radius_count(self): # center the data X = 2 * self.X - 1 dm = DistanceMetric() D = dm.pdist(X, squareform=True) r = np.mean(D) bt = BallTree(X) count1 = bt.query_radius(X, r, count_only=True) count2 = (D <= r).sum(1) assert_array_almost_equal(count1, count2)