Exemplo n.º 1
0
def demodata_mtest_infr(state='empty'):
    import ibeis
    ibs = ibeis.opendb(db='PZ_MTEST')
    annots = ibs.annots()
    names = list(annots.group_items(annots.nids).values())
    ut.shuffle(names, rng=321)
    test_aids = ut.flatten(names[1::2])
    infr = ibeis.AnnotInference(ibs, test_aids, autoinit=True)
    infr.reset(state=state)
    return infr
Exemplo n.º 2
0
def randomized_ibeis_dset(dbname, dim=224):
    """
    Ignore:
        >>> from clab.live.siam_train import *
        >>> datasets = randomized_ibeis_dset('PZ_MTEST')
        >>> ut.qtensure()
        >>> self = datasets['train']
        >>> self.augment = True
        >>> self.show_sample()
    """
    # from clab.live.siam_train import *
    # dbname = 'PZ_MTEST'
    import utool as ut
    from ibeis.algo.verif import vsone
    # pblm = vsone.OneVsOneProblem.from_empty('PZ_MTEST')
    pblm = vsone.OneVsOneProblem.from_empty(dbname)

    pccs = list(pblm.infr.positive_components())
    pcc_freq = list(map(len, pccs))
    freq_grouped = ub.group_items(pccs, pcc_freq)

    # Simpler very randomized sample strategy
    train_pccs = []
    vali_pccs = []
    test_pccs = []
    import math

    # vali_frac = .1
    test_frac = .1
    vali_frac = 0

    for i, group in freq_grouped.items():
        group = ut.shuffle(group, rng=432232 + i)
        n_test = 0 if len(group) == 1 else math.ceil(len(group) * test_frac)
        test, learn = group[:n_test], group[n_test:]
        n_vali = 0 if len(group) == 1 else math.ceil(len(learn) * vali_frac)
        vali, train = group[:n_vali], group[-n_vali:]
        train_pccs.extend(train)
        test_pccs.extend(test)
        vali_pccs.extend(vali)

    test_dataset = RandomBalancedIBEISSample(pblm, test_pccs, dim=dim)
    train_dataset = RandomBalancedIBEISSample(pblm, train_pccs, dim=dim)
    vali_dataset = RandomBalancedIBEISSample(pblm, vali_pccs, dim=dim)
    train_dataset.augment = True

    datasets = {
        'train': train_dataset,
        # 'vali': vali_dataset,
        'test': test_dataset,
    }
    return datasets
Exemplo n.º 3
0
    def _precollect(self):
        """
        Sets up an ibs object with an aids_pool

        Example:
            >>> # DISABLE_DOCTEST
            >>> from wbia.scripts.thesis import *
            >>> self = Chap3('humpbacks_fb')
            >>> self = Chap3('GZ_Master1')
            >>> self = Chap3('GIRM_Master1')
            >>> self = Chap3('PZ_MTEST')
            >>> self = Chap3('PZ_PB_RF_TRAIN')
            >>> self = Chap3('PZ_Master1')
            >>> self = Chap3('RotanTurtles')
            >>> self._precollect()

            >>> from wbia.scripts.thesis import *
            >>> self = Chap4('PZ_Master1')
            >>> self._precollect()
        """
        import wbia
        from wbia.init import main_helpers

        self.dbdir = wbia.sysres.lookup_dbdir(self.dbname)
        ibs = wbia.opendb(dbdir=self.dbdir)
        if ibs.dbname.startswith('PZ_PB_RF_TRAIN'):
            aids = ibs.get_valid_aids()
        elif ibs.dbname.startswith('LF_ALL'):
            aids = ibs.get_valid_aids()
        elif ibs.dbname.startswith('PZ_Master'):
            # PZ_Master is too big to run in full.  Select a smaller sample.
            # Be sure to include photobomb and incomparable cases.
            aids = ibs.filter_annots_general(require_timestamp=True,
                                             species='primary',
                                             is_known=True,
                                             minqual='poor')
            infr = wbia.AnnotInference(ibs=ibs, aids=aids)
            infr.reset_feedback('staging', apply=True)
            minority_ccs = find_minority_class_ccs(infr)
            minority_aids = set(ut.flatten(minority_ccs))

            # We need to do our best to select a small sample here
            flags = [
                'left' in text for text in ibs.annots(aids).viewpoint_code
            ]
            left_aids = ut.compress(aids, flags)

            majority_aids = set(
                ibs.filter_annots_general(
                    left_aids,
                    require_timestamp=True,
                    species='primary',
                    minqual='poor',
                    require_quality=True,
                    min_pername=2,
                    max_pername=15,
                ))
            # This produces 5720 annotations
            aids = sorted(majority_aids.union(minority_aids))
        else:
            aids = ibs.filter_annots_general(require_timestamp=True,
                                             is_known=True,
                                             species='primary',
                                             minqual='poor')

        if ibs.dbname.startswith('MantaMatcher'):
            # Remove some of the singletons for this db
            annots = ibs.annots(aids)
            names = annots.group2(annots.nids)
            multis = [aids for aids in names if len(aids) > 1]
            singles = [aids for aids in names if len(aids) == 1]
            rng = np.random.RandomState(3988708794)
            aids = ut.flatten(multis)
            aids += ut.shuffle(ut.flatten(singles), rng=rng)[0:358]

        # ibs.print_annot_stats(aids, prefix='P')
        main_helpers.monkeypatch_encounters(ibs, aids, minutes=30)
        logger.info('post monkey patch')
        # if False:
        #     ibs.print_annot_stats(aids, prefix='P')
        self.ibs = ibs
        self.aids_pool = aids
Exemplo n.º 4
0
def att_faces_datasets(dim=224):
    """
    https://github.com/harveyslash/Facial-Similarity-with-Siamese-Networks-in-Pytorch

        >>> from clab.live.siam_train import *
        >>> train_dataset, vali_dataset, test_dataset = att_faces_datasets()
        train_dataset[0][0].shape

        fpath = train_dataset.img1_fpaths[0]
    """
    def ensure_att_dataset():
        def unzip(zip_fpath, dpath=None, verbose=1):
            """
            Extracts all members of a zipfile.

            Args:
                zip_fpath (str): path of zip file to unzip.
                dpath (str): directory to unzip to. If not specified, it defaults
                    to a folder parallel to the zip file (excluding the extension).
                verbose (int): verbosity level
            """
            import zipfile
            from os.path import splitext
            from ubelt import progiter
            if dpath is None:
                dpath = splitext(zip_fpath)[0]
            with zipfile.ZipFile(zip_fpath, 'r') as zf:
                members = zf.namelist()
                prog = progiter.ProgIter(members,
                                         verbose=verbose,
                                         label='unzipping')
                for zipinfo in prog:
                    zf.extract(zipinfo, path=dpath, pwd=None)
            return dpath

        faces_zip_fpath = ub.grabdata(
            'http://www.cl.cam.ac.uk/Research/DTG/attarchive/pub/data/att_faces.zip'
        )
        from os.path import splitext
        dpath = splitext(faces_zip_fpath)[0]
        if not os.path.exists(dpath):
            dpath = unzip(faces_zip_fpath, dpath=dpath)
        return dpath

    # Download the data if you dont have it
    dpath = ensure_att_dataset()

    import torchvision.datasets
    torchvision.datasets.folder.IMG_EXTENSIONS += ['.pgm']
    im_dset = torchvision.datasets.ImageFolder(root=dpath)
    class_to_id = ub.group_items(*zip(*im_dset.imgs))

    import utool as ut
    names = sorted(list(class_to_id.keys()))
    names = ut.shuffle(names, rng=10)
    learn, test = names[:40], names[40:]
    train, vali = learn[:35], learn[35:]
    print('train = {!r}'.format(len(train)))
    print('vali = {!r}'.format(len(vali)))
    print('test = {!r}'.format(len(test)))

    train_dataset = LabeledPairDataset(*pair_sampler(
        ub.dict_subset(class_to_id, train)),
                                       dim=dim)
    vali_dataset = LabeledPairDataset(*pair_sampler(
        ub.dict_subset(class_to_id, vali)),
                                      dim=dim)
    test_dataset = LabeledPairDataset(*pair_sampler(
        ub.dict_subset(class_to_id, test)),
                                      dim=dim)
    print('train_dataset = {!r}'.format(len(train_dataset)))
    print('vali_dataset = {!r}'.format(len(vali_dataset)))
    print('test_dataset = {!r}'.format(len(test_dataset)))
    return train_dataset, vali_dataset, test_dataset
Exemplo n.º 5
0
    def find_neg_augment_edges(infr, cc1, cc2, k=None):
        """
        Find enough edges to between two pccs to make them k-negative complete
        The two CCs should be disjoint and not have any positive edges between
        them.

        Args:
            cc1 (set): nodes in one PCC
            cc2 (set): nodes in another positive-disjoint PCC
            k (int): redundnacy level (if None uses infr.params['redun.neg'])

        Example:
            >>> # DISABLE_DOCTEST
            >>> from wbia.algo.graph import demo
            >>> k = 2
            >>> cc1, cc2 = {1}, {2, 3}
            >>> # --- return an augmentation if feasible
            >>> infr = demo.demodata_infr(ccs=[cc1, cc2], ignore_pair=True)
            >>> edges = set(infr.find_neg_augment_edges(cc1, cc2, k=k))
            >>> assert edges == {(1, 2), (1, 3)}
            >>> # --- if infeasible return a partial augmentation
            >>> infr.add_feedback((1, 2), INCMP)
            >>> edges = set(infr.find_neg_augment_edges(cc1, cc2, k=k))
            >>> assert edges == {(1, 3)}
        """
        if k is None:
            k = infr.params['redun.neg']
        assert cc1 is not cc2, 'CCs should be disjoint (but they are the same)'
        assert len(cc1.intersection(cc2)) == 0, 'CCs should be disjoint'
        existing_edges = set(nxu.edges_cross(infr.graph, cc1, cc2))

        reviewed_edges = {
            edge: state
            for edge, state in zip(
                existing_edges, infr.edge_decision_from(existing_edges)
            )
            if state != UNREV
        }

        # Find how many negative edges we already have
        num = sum([state == NEGTV for state in reviewed_edges.values()])
        if num < k:
            # Find k random negative edges
            check_edges = existing_edges - set(reviewed_edges)
            # Check the existing but unreviewed edges first
            for edge in check_edges:
                num += 1
                yield edge
                if num >= k:
                    return
            # Check non-existing edges next
            seed = 2827295125
            try:
                seed += sum(cc1) + sum(cc2)
            except Exception:
                pass
            rng = np.random.RandomState(seed)
            cc1 = ut.shuffle(list(cc1), rng=rng)
            cc2 = ut.shuffle(list(cc2), rng=rng)
            cc1 = ut.shuffle(list(cc1), rng=rng)
            for edge in it.starmap(nxu.e_, nxu.diag_product(cc1, cc2)):
                if edge not in existing_edges:
                    num += 1
                    yield edge
                    if num >= k:
                        return