Пример #1
0
    log.info("Loading plugins")
    descriptor_index = MemoryDescriptorIndex(
        file_cache=DESCRIPTOR_INDEX_FILE_CACHE)
    #: :type: smqtk.algorithms.Classifier
    classifier = from_plugin_config(classifier_config['plugins']['classifier'],
                                    get_classifier_impls())
    c_factory = ClassificationElementFactory(MemoryClassificationElement, {})

    #: :type: dict[str, list[str]]
    phone2shas = json.load(open(PHONE_SHA1_JSON))
    #: :type: dict[str, float]
    phone2score = {}

    log.info("Classifying phone imagery descriptors")
    i = 0
    descriptor_index_shas = set(descriptor_index.iterkeys())
    for p in phone2shas:
        log.info('%s (%d / %d)', p, i + 1, len(phone2shas))
        # Not all source "images" have descriptors since some URLs returned
        # non-image files. Intersect phone sha's with what was actually
        # computed. Warn if this reduces descriptors for classification to zero.
        indexed_shas = set(phone2shas[p]) & descriptor_index_shas
        if not indexed_shas:
            raise RuntimeError(
                "Phone number '%s' has no valid images associated "
                "with it.\nBefore:\n%s\n\nAfter:\n%s" %
                (p, phone2shas[p], indexed_shas))

        descriptor_elems = descriptor_index.get_many_descriptors(*indexed_shas)
        e2c = classifier.classify_async(descriptor_elems,
                                        c_factory,
Пример #2
0
 def test_iterkeys(self):
     i = MemoryDescriptorIndex()
     descrs = [random_descriptor() for _ in range(100)]
     i.add_many_descriptors(descrs)
     ntools.assert_equal(set(i.iterkeys()), set(d.uuid() for d in descrs))
Пример #3
0
 def test_iterkeys(self):
     i = MemoryDescriptorIndex()
     descrs = [random_descriptor() for _ in xrange(100)]
     i.add_many_descriptors(descrs)
     ntools.assert_equal(set(i.iterkeys()),
                         set(d.uuid() for d in descrs))
Пример #4
0
    log.info("Loading plugins")
    descriptor_index = MemoryDescriptorIndex(file_cache=DESCRIPTOR_INDEX_FILE_CACHE)
    #: :type: smqtk.algorithms.Classifier
    classifier = from_plugin_config(classifier_config['plugins']['classifier'],
                                    get_classifier_impls())
    c_factory = ClassificationElementFactory(MemoryClassificationElement, {})

    #: :type: dict[str, list[str]]
    phone2shas = json.load(open(PHONE_SHA1_JSON))
    #: :type: dict[str, float]
    phone2score = {}

    log.info("Classifying phone imagery descriptors")
    i = 0
    descriptor_index_shas = set(descriptor_index.iterkeys())
    for p in phone2shas:
        log.info('%s (%d / %d)', p, i + 1, len(phone2shas))
        # Not all source "images" have descriptors since some URLs returned
        # non-image files. Intersect phone sha's with what was actually
        # computed. Warn if this reduces descriptors for classification to zero.
        indexed_shas = set(phone2shas[p]) & descriptor_index_shas
        if not indexed_shas:
            raise RuntimeError(
                "Phone number '%s' has no valid images associated "
                "with it.\nBefore:\n%s\n\nAfter:\n%s"
                % (p, phone2shas[p], indexed_shas))

        descriptor_elems = descriptor_index.get_many_descriptors(*indexed_shas)
        e2c = classifier.classify_async(descriptor_elems, c_factory,
                                        use_multiprocessing=True, ri=1.)