Exemplo n.º 1
0
    def test_added_descriptor_table_caching(self):
        cache_elem = DataMemoryElement(readonly=False)
        descrs = [random_descriptor() for _ in range(3)]
        expected_table = dict((r.uuid(), r) for r in descrs)

        i = MemoryDescriptorSet(cache_elem)
        self.assertTrue(cache_elem.is_empty())

        # Should add descriptors to table, caching to writable element.
        i.add_many_descriptors(descrs)
        self.assertFalse(cache_elem.is_empty())
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)

        # Changing the internal table (remove, add) it should reflect in
        # cache
        new_d = random_descriptor()
        expected_table[new_d.uuid()] = new_d
        i.add_descriptor(new_d)
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)

        rm_d = list(expected_table.values())[0]
        del expected_table[rm_d.uuid()]
        i.remove_descriptor(rm_d.uuid())
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)
Exemplo n.º 2
0
    def test_add_descriptor(self):
        index = MemoryDescriptorSet()

        d1 = random_descriptor()
        index.add_descriptor(d1)
        self.assertEqual(index._table[d1.uuid()], d1)

        d2 = random_descriptor()
        index.add_descriptor(d2)
        self.assertEqual(index._table[d2.uuid()], d2)
Exemplo n.º 3
0
    def test_count(self):
        index = MemoryDescriptorSet()
        self.assertEqual(index.count(), 0)

        d1 = random_descriptor()
        index.add_descriptor(d1)
        self.assertEqual(index.count(), 1)

        d2, d3, d4 = (random_descriptor(), random_descriptor(),
                      random_descriptor())
        index.add_many_descriptors([d2, d3, d4])
        self.assertEqual(index.count(), 4)

        d5 = random_descriptor()
        index.add_descriptor(d5)
        self.assertEqual(index.count(), 5)
    def test_clustering_equal_descriptors(self):
        # Test that clusters of descriptor of size  n-features are correctly
        # clustered together.
        print("Creating dummy descriptors")
        n_features = 8
        n_descriptors = 20

        desr_set = MemoryDescriptorSet()
        c = 0
        for i in range(n_features):
            v = numpy.ndarray((8, ))
            v[...] = 0
            v[i] = 1
            for j in range(n_descriptors):
                d = DescriptorMemoryElement('test', c)
                d.set_vector(v)
                desr_set.add_descriptor(d)
                c += 1

        print("Creating test MBKM")
        mbkm = MiniBatchKMeans(n_features,
                               batch_size=12,
                               verbose=True,
                               compute_labels=False,
                               random_state=0)

        # Initial fit with half of desr_set
        d_classes = mb_kmeans_build_apply(desr_set, mbkm, n_descriptors)

        # There should be 20 descriptors per class
        for c in d_classes:
            self.assertEqual(
                len(d_classes[c]), n_descriptors,
                "Cluster %s did not have expected number of descriptors "
                "(%d != %d)" % (c, n_descriptors, len(d_classes[c])))

            # Each descriptor in each cluster should be equal to the other
            # descriptors in that cluster
            uuids = list(d_classes[c])
            v = desr_set[uuids[0]].vector()
            for uuid in uuids[1:]:
                v2 = desr_set[uuid].vector()
                numpy.testing.assert_array_equal(
                    v, v2, "vector in cluster %d did not "
                    "match other vectors "
                    "(%s != %s)" % (c, v, v2))