Пример #1
0
    def test_get_descriptors(self):
        descrs = [
            random_descriptor(),  # [0]
            random_descriptor(),  # [1]
            random_descriptor(),  # [2]
            random_descriptor(),  # [3]
            random_descriptor(),  # [4]
        ]
        index = MemoryDescriptorIndex()
        index.add_many_descriptors(descrs)

        # single descriptor reference
        r = index.get_descriptor(descrs[1].uuid())
        ntools.assert_equal(r, descrs[1])

        # multiple descriptor reference
        r = list(
            index.get_many_descriptors([descrs[0].uuid(), descrs[3].uuid()]))
        ntools.assert_equal(len(r), 2)
        ntools.assert_equal(set(r), {descrs[0], descrs[3]})
Пример #2
0
    def test_get_descriptors(self):
        descrs = [
            random_descriptor(),   # [0]
            random_descriptor(),   # [1]
            random_descriptor(),   # [2]
            random_descriptor(),   # [3]
            random_descriptor(),   # [4]
        ]
        index = MemoryDescriptorIndex()
        index.add_many_descriptors(descrs)

        # single descriptor reference
        r = index.get_descriptor(descrs[1].uuid())
        ntools.assert_equal(r, descrs[1])

        # multiple descriptor reference
        r = list(index.get_many_descriptors([descrs[0].uuid(),
                                             descrs[3].uuid()]))
        ntools.assert_equal(len(r), 2)
        ntools.assert_equal(set(r),
                            {descrs[0], descrs[3]})
Пример #3
0
    log.info("Classifying phone imagery descriptors")
    i = 0
    descriptor_index_shas = set(descriptor_index.iterkeys())
    for p in phone2shas:
        log.info('%s (%d / %d)', p, i + 1, len(phone2shas))
        # Not all source "images" have descriptors since some URLs returned
        # non-image files. Intersect phone sha's with what was actually
        # computed. Warn if this reduces descriptors for classification to zero.
        indexed_shas = set(phone2shas[p]) & descriptor_index_shas
        if not indexed_shas:
            raise RuntimeError(
                "Phone number '%s' has no valid images associated "
                "with it.\nBefore:\n%s\n\nAfter:\n%s" %
                (p, phone2shas[p], indexed_shas))

        descriptor_elems = descriptor_index.get_many_descriptors(*indexed_shas)
        e2c = classifier.classify_async(descriptor_elems,
                                        c_factory,
                                        use_multiprocessing=True,
                                        ri=1.)
        pos_scores = [c['positive'] for c in e2c.values()]

        # Max of pool
        phone2score[p] = max(pos_scores)

        i += 1

    log.info("Saving score map")
    csv.writer(open(PHONE2SCORE_OUTPUT_FILEPATH, 'w')) \
        .writerows(sorted(phone2score.iteritems()))
Пример #4
0
    log.info("Classifying phone imagery descriptors")
    i = 0
    descriptor_index_shas = set(descriptor_index.iterkeys())
    for p in phone2shas:
        log.info('%s (%d / %d)', p, i + 1, len(phone2shas))
        # Not all source "images" have descriptors since some URLs returned
        # non-image files. Intersect phone sha's with what was actually
        # computed. Warn if this reduces descriptors for classification to zero.
        indexed_shas = set(phone2shas[p]) & descriptor_index_shas
        if not indexed_shas:
            raise RuntimeError(
                "Phone number '%s' has no valid images associated "
                "with it.\nBefore:\n%s\n\nAfter:\n%s"
                % (p, phone2shas[p], indexed_shas))

        descriptor_elems = descriptor_index.get_many_descriptors(*indexed_shas)
        e2c = classifier.classify_async(descriptor_elems, c_factory,
                                        use_multiprocessing=True, ri=1.)
        pos_scores = [c['positive'] for c in e2c.values()]

        # Max of pool
        phone2score[p] = max(pos_scores)

        i += 1

    log.info("Saving score map")
    csv.writer(open(PHONE2SCORE_OUTPUT_FILEPATH, 'w')) \
        .writerows(sorted(phone2score.iteritems()))


log.info("Done")