示例#1
0
    def setUp(self):
        self.V = []
        self.V.append((numpy.array([0]), 'data1', 0.4))
        self.V.append((numpy.array([1]), 'data2', 0.9))
        self.V.append((numpy.array([2]), 'data3', 1.4))
        self.V.append((numpy.array([3]), 'data4', 2.1))
        self.V.append((numpy.array([4]), 'data5', 0.1))
        self.V.append((numpy.array([5]), 'data6', 8.7))
        self.V.append((numpy.array([6]), 'data7', 3.4))
        self.V.append((numpy.array([7]), 'data8', 2.8))

        self.threshold_filter = DistanceThresholdFilter(1.0)
        self.nearest_filter = NearestFilter(5)
        self.unique = UniqueFilter()
示例#2
0
    def __init__(self,
                 dim,
                 lshashes=None,
                 distance=None,
                 fetch_vector_filters=None,
                 vector_filters=None,
                 storage=None):
        """ Keeps the configuration. """
        if lshashes is None:
            lshashes = [RandomBinaryProjections('default', 10)]
        self.lshashes = lshashes
        if distance is None: distance = EuclideanDistance()
        self.distance = distance
        if vector_filters is None: vector_filters = [NearestFilter(10)]
        self.vector_filters = vector_filters
        if fetch_vector_filters is None:
            fetch_vector_filters = [UniqueFilter()]
        self.fetch_vector_filters = fetch_vector_filters
        if storage is None: storage = MemoryStorage()
        self.storage = storage

        # Initialize all hashes for the data space dimension.
        for lshash in self.lshashes:
            lshash.reset(dim)

        print('*** engine init done ***')
示例#3
0
def main(args):
    """ Main entry.
    """

    data = Dataset(args.dataset)
    num, dim = data.base.shape

    # We are looking for the ten closest neighbours
    nearest = NearestFilter(args.topk)
    # We want unique candidates
    unique = UniqueFilter()

    # Create engines for all configurations
    for nbit, ntbl in itertools.product(args.nbits, args.ntbls):
        logging.info("Creating Engine ...")
        lshashes = [RandomBinaryProjections('rbp%d' % i, nbit)
                    for i in xrange(ntbl)]

        # Create engine with this configuration
        engine = Engine(dim, lshashes=lshashes,
                        vector_filters=[unique, nearest])
        logging.info("\tDone!")

        logging.info("Adding items ...")
        for i in xrange(num):
            engine.store_vector(data.base[i, :], i)
            if i % 100000 == 0:
                logging.info("\t%d/%d" % (i, data.nbae))
        logging.info("\tDone!")

        ids = np.zeros((data.nqry, args.topk), np.int)
        logging.info("Searching ...")
        tic()
        for i in xrange(data.nqry):
            reti = [y for x, y, z in
                    np.array(engine.neighbours(data.query[i]))]
            ids[i, :len(reti)] = reti
            if i % 100 == 0:
                logging.info("\t%d/%d" % (i, data.nqry))
        time_costs = toc()
        logging.info("\tDone!")

        report = os.path.join(args.exp_dir, "report.txt")
        with open(report, "a") as rptf:
            rptf.write("*" * 64 + "\n")
            rptf.write("* %s\n" % time.asctime())
            rptf.write("*" * 64 + "\n")

        r_at_k = compute_stats(data.groundtruth, ids, args.topk)[-1][-1]

        with open(report, "a") as rptf:
            rptf.write("=" * 64 + "\n")
            rptf.write("index_%s-nbit_%d-ntbl_%d\n" % ("NearPy", nbit, ntbl))
            rptf.write("-" * 64 + "\n")
            rptf.write("recall@%-8d%.4f\n" % (args.topk, r_at_k))
            rptf.write("time cost (ms): %.3f\n" %
                       (time_costs * 1000 / data.nqry))
示例#4
0
    def setUp(self):
        self.V = []
        self.V.append((numpy.array([0]), 'data1', 0.4))
        self.V.append((numpy.array([1]), 'data2', 0.9))
        self.V.append((numpy.array([2]), 'data3', 1.4))
        self.V.append((numpy.array([3]), 'data4', 2.1))
        self.V.append((numpy.array([4]), 'data5', 0.1))
        self.V.append((numpy.array([5]), 'data6', 8.7))
        self.V.append((numpy.array([6]), 'data7', 3.4))
        self.V.append((numpy.array([7]), 'data8', 2.8))

        self.threshold_filter = DistanceThresholdFilter(1.0)
        self.nearest_filter = NearestFilter(5)
        self.unique = UniqueFilter()
示例#5
0
class TestVectorFilters(unittest.TestCase):

    def setUp(self):
        self.V = []
        self.V.append((numpy.array([0]), 'data1', 0.4))
        self.V.append((numpy.array([1]), 'data2', 0.9))
        self.V.append((numpy.array([2]), 'data3', 1.4))
        self.V.append((numpy.array([3]), 'data4', 2.1))
        self.V.append((numpy.array([4]), 'data5', 0.1))
        self.V.append((numpy.array([5]), 'data6', 8.7))
        self.V.append((numpy.array([6]), 'data7', 3.4))
        self.V.append((numpy.array([7]), 'data8', 2.8))

        self.threshold_filter = DistanceThresholdFilter(1.0)
        self.nearest_filter = NearestFilter(5)
        self.unique = UniqueFilter()

    def test_thresholding(self):
        result = self.threshold_filter.filter_vectors(self.V)
        self.assertEqual(len(result), 3)
        self.assertIn(self.V[0], result)
        self.assertIn(self.V[1], result)
        self.assertIn(self.V[4], result)

    def test_nearest(self):
        result = self.nearest_filter.filter_vectors(self.V)
        self.assertEqual(len(result), 5)
        self.assertIn(self.V[0], result)
        self.assertIn(self.V[1], result)
        self.assertIn(self.V[4], result)
        self.assertIn(self.V[2], result)
        self.assertIn(self.V[3], result)

    def test_unique(self):
        W = self.V
        W.append((numpy.array([7]), 'data8', 2.8))
        W.append((numpy.array([0]), 'data1', 2.8))
        W.append((numpy.array([1]), 'data2', 2.8))
        W.append((numpy.array([6]), 'data7', 2.8))

        result = self.unique.filter_vectors(W)
        self.assertEqual(len(result), 8)
示例#6
0
class TestVectorFilters(unittest.TestCase):

    def setUp(self):
        self.V = []
        self.V.append((numpy.array([0]), 'data1', 0.4))
        self.V.append((numpy.array([1]), 'data2', 0.9))
        self.V.append((numpy.array([2]), 'data3', 1.4))
        self.V.append((numpy.array([3]), 'data4', 2.1))
        self.V.append((numpy.array([4]), 'data5', 0.1))
        self.V.append((numpy.array([5]), 'data6', 8.7))
        self.V.append((numpy.array([6]), 'data7', 3.4))
        self.V.append((numpy.array([7]), 'data8', 2.8))

        self.threshold_filter = DistanceThresholdFilter(1.0)
        self.nearest_filter = NearestFilter(5)
        self.unique = UniqueFilter()

    def test_thresholding(self):
        result = self.threshold_filter.filter_vectors(self.V)
        self.assertEqual(len(result), 3)
        self.assertTrue(self.V[0] in result)
        self.assertTrue(self.V[1] in result)
        self.assertTrue(self.V[4] in result)

    def test_nearest(self):
        result = self.nearest_filter.filter_vectors(self.V)
        self.assertEqual(len(result), 5)
        self.assertTrue(self.V[0] in result)
        self.assertTrue(self.V[1] in result)
        self.assertTrue(self.V[4] in result)
        self.assertTrue(self.V[2] in result)
        self.assertTrue(self.V[3] in result)

    def test_unique(self):
        W = self.V
        W.append((numpy.array([7]), 'data8', 2.8))
        W.append((numpy.array([0]), 'data1', 2.8))
        W.append((numpy.array([1]), 'data2', 2.8))
        W.append((numpy.array([6]), 'data7', 2.8))

        result = self.unique.filter_vectors(W)
        self.assertEqual(len(result), 8)
示例#7
0
    def __init__(self,
                 matrix,
                 max_neighbours=20,
                 lshashes=[RandomBinaryProjections("rbp", 10)],
                 vector_filters=[UniqueFilter()],
                 distance=Pearson()):

        if not isinstance(lshashes, list):

            raise TypeError("'lshashes' must be an instance of 'list'")

        if not isinstance(vector_filters, list):

            raise TypeError("'vector_filters' must be an instance of 'list'")

        self.underlying = Engine(len(matrix[0]),
                                 lshashes=lshashes,
                                 vector_filters=vector_filters +
                                 [NearestFilter(max_neighbours)],
                                 distance=distance)

        for vector in matrix:

            self.underlying.store_vector(vector)
示例#8
0
engine1 = Engine(dimension - 1,
                 lshashes=[rbp, rbp, rbp],
                 storage=MemoryStorage(),
                 distance=EuclideanDistance(),
                 vector_filters=[NearestFilter(100)])

engine1.store_many_vectors(dataBase, [i for i in range(featureNum)])

begin_time = time()
print('        预测值      误差')
err = []
for m in range(len(queryBase)):
    query = queryBase[m]
    N = engine1.neighbours(query,
                           distance='euclidean',
                           fetch_vector_filters=[UniqueFilter()])
    index = [int(x[1]) for x in N]
    # print(index)
    data = np.array([dataBaseInitial.iloc[index, :]])
    data = data[0]
    # print(data.shape)
    query = np.array([queryBase[m]])
    # print(query.shape)
    # 高斯回归
    kernel = C(0.1, (0.001, 0.1)) * RBF(0.5, (1e-4, 10))
    reg = GaussianProcessRegressor(kernel=kernel,
                                   n_restarts_optimizer=10,
                                   alpha=0.01)
    reg.fit(data[:, 1:], data[:, 0])
    output = reg.predict(query)