def main():
    args = cli_parser().parse_args()
    config = utility_main_helper(default_config, args)
    log = logging.getLogger(__name__)

    output_filepath = args.output_map
    if not output_filepath:
        raise ValueError("No path given for output map file (pickle).")

    #: :type: smqtk.representation.DescriptorIndex
    index = from_plugin_config(config['descriptor_index'],
                               get_descriptor_index_impls())
    mbkm = MiniBatchKMeans(verbose=args.verbose,
                           compute_labels=False,
                           **config['minibatch_kmeans_params'])
    initial_fit_size = int(config['initial_fit_size'])

    d_classes = mb_kmeans_build_apply(index, mbkm, initial_fit_size)

    log.info("Saving KMeans centroids to: %s",
             config['centroids_output_filepath_npy'])
    numpy.save(config['centroids_output_filepath_npy'], mbkm.cluster_centers_)

    log.info("Saving result classification map to: %s", output_filepath)
    safe_create_dir(os.path.dirname(output_filepath))
    with open(output_filepath, 'w') as f:
        cPickle.dump(d_classes, f, -1)

    log.info("Done")
def main():
    args = cli_parser().parse_args()
    config = utility_main_helper(default_config, args)
    log = logging.getLogger(__name__)

    output_filepath = args.output_map
    if not output_filepath:
        raise ValueError("No path given for output map file (pickle).")

    #: :type: smqtk.representation.DescriptorIndex
    index = from_plugin_config(config['descriptor_index'],
                               get_descriptor_index_impls())
    mbkm = MiniBatchKMeans(verbose=args.verbose,
                           compute_labels=False,
                           **config['minibatch_kmeans_params'])
    initial_fit_size = int(config['initial_fit_size'])

    d_classes = mb_kmeans_build_apply(index, mbkm, initial_fit_size)

    log.info("Saving KMeans centroids to: %s",
             config['centroids_output_filepath_npy'])
    numpy.save(config['centroids_output_filepath_npy'], mbkm.cluster_centers_)

    log.info("Saving result classification map to: %s", output_filepath)
    safe_create_dir(os.path.dirname(output_filepath))
    with open(output_filepath, 'w') as f:
        cPickle.dump(d_classes, f, -1)

    log.info("Done")
    def test_clustering_equal_descriptors(self):
        # Test that clusters of descriptor of size  n-features are correctly
        # clustered together.
        print("Creating dummy descriptors")
        n_features = 8
        n_descriptors = 20

        index = MemoryDescriptorIndex()
        c = 0
        for i in range(n_features):
            v = numpy.ndarray((8,))
            v[...] = 0
            v[i] = 1
            for j in range(n_descriptors):
                d = DescriptorMemoryElement('test', c)
                d.set_vector(v)
                index.add_descriptor(d)
                c += 1

        print("Creating test MBKM")
        mbkm = MiniBatchKMeans(n_features, batch_size=12, verbose=True,
                               compute_labels=False, random_state=0)

        # Initial fit with half of index
        d_classes = mb_kmeans_build_apply(index, mbkm, n_descriptors)

        # There should be 20 descriptors per class
        for c in d_classes:
            self.assertEqual(
                len(d_classes[c]),
                n_descriptors,
                "Cluster %s did not have expected number of descriptors "
                "(%d != %d)"
                % (c, n_descriptors, len(d_classes[c]))
            )

            # Each descriptor in each cluster should be equal to the other
            # descriptors in that cluster
            uuids = list(d_classes[c])
            v = index[uuids[0]].vector()
            for uuid in uuids[1:]:
                v2 = index[uuid].vector()
                numpy.testing.assert_array_equal(v, v2,
                                                 "vector in cluster %d did not "
                                                 "match other vectors "
                                                 "(%s != %s)"
                                                 % (c, v, v2))
    def test_clustering_equal_descriptors(self):
        # Test that clusters of descriptor of size  n-features are correctly
        # clustered together.
        print "Creating dummy descriptors"
        n_features = 8
        n_descriptors = 20

        index = MemoryDescriptorIndex()
        c = 0
        for i in range(n_features):
            v = numpy.ndarray(8)
            v[...] = 0
            v[i] = 1
            for j in range(n_descriptors):
                d = DescriptorMemoryElement('test', c)
                d.set_vector(v)
                index.add_descriptor(d)
                c += 1

        print "Creating test MBKM"
        mbkm = MiniBatchKMeans(n_features,
                               batch_size=12,
                               verbose=True,
                               compute_labels=False,
                               random_state=0)

        # Initial fit with half of index
        d_classes = mb_kmeans_build_apply(index, mbkm, n_descriptors)

        # There should be 20 descriptors per class
        for c in d_classes:
            nose.tools.assert_equal(
                len(d_classes[c]), n_descriptors,
                "Cluster %s did not have expected number of descriptors "
                "(%d != %d)" % (c, n_descriptors, len(d_classes[c])))

            # Each descriptor in each cluster should be equal to the other
            # descriptors in that cluster
            uuids = list(d_classes[c])
            v = index[uuids[0]].vector()
            for uuid in uuids[1:]:
                v2 = index[uuid].vector()
                numpy.testing.assert_array_equal(
                    v, v2, "vector in cluster %d did not "
                    "match other vectors "
                    "(%s != %s)" % (c, v, v2))
 def test_empty_index(self):
     # what happens when function given an empty descriptor index
     index = MemoryDescriptorIndex()
     mbkm = MiniBatchKMeans()
     d = mb_kmeans_build_apply(index, mbkm, 0)
     nose.tools.assert_false(d)
 def test_empty_index(self):
     # what happens when function given an empty descriptor index
     index = MemoryDescriptorIndex()
     mbkm = MiniBatchKMeans()
     d = mb_kmeans_build_apply(index, mbkm, 0)
     self.assertFalse(d)
 def test_emptyset(self):
     # what happens when function given an empty descriptor set
     descr_set = MemoryDescriptorSet()
     mbkm = MiniBatchKMeans()
     d = mb_kmeans_build_apply(descr_set, mbkm, 0)
     self.assertFalse(d)