Пример #1
0
    def setup(self):
        # These scores are achieved for the following evaluation parameters.
        # class_weights='auto', C from 3 ** -2 .. 3 ** 8, and one shuffle
        # split with test size of 0.25.
        self.expected_scores = {'fv': 55.226, 'fv_sfv': 56.718, 'bow': 42.896}

        src_cfg = 'hollywood2_clean'
        nr_clusters = 100

        self.dataset = Dataset(src_cfg,
                               ip_type='dense5.track15mbh',
                               suffix='.original',
                               nr_clusters=nr_clusters)
        sstats_folder = self.dataset.SSTATS_DIR
        gmm_fn = self.dataset.GMM

        self.tr_fn = os.path.join(sstats_folder, 'train_1_1_1_0.dat')
        self.sp_tr_fn = os.path.join(sstats_folder,
                                     'spatial_train_1_1_1_0.dat')
        self.tr_labels_fn = os.path.join(sstats_folder, 'labels_train.info')
        self.te_fn = os.path.join(sstats_folder, 'test_1_1_1_0.dat')
        self.sp_te_fn = os.path.join(sstats_folder, 'spatial_test_1_1_1_0.dat')
        self.te_labels_fn = os.path.join(sstats_folder, 'labels_test.info')

        self.gmm = load_gmm(gmm_fn)

        tr_labels = pickle.load(open(self.tr_labels_fn, 'r'))
        te_labels = pickle.load(open(self.te_labels_fn, 'r'))

        self.cx = tr_labels
        self.cy = te_labels
Пример #2
0
    def setup(self):
        # These scores are achieved for the following evaluation parameters.
        # class_weights='auto', C from 3 ** -2 .. 3 ** 8, and one shuffle
        # split with test size of 0.25.
        self.expected_scores = {'fv': 55.226,
                                'fv_sfv': 56.718,
                                'bow': 42.896}

        src_cfg = 'hollywood2_clean'
        nr_clusters = 100

        self.dataset = Dataset(src_cfg, ip_type='dense5.track15mbh',
                               suffix='.original', nr_clusters=nr_clusters)
        sstats_folder = self.dataset.SSTATS_DIR
        gmm_fn = self.dataset.GMM

        self.tr_fn = os.path.join(sstats_folder, 'train_1_1_1_0.dat')
        self.sp_tr_fn = os.path.join(sstats_folder,
                                     'spatial_train_1_1_1_0.dat')
        self.tr_labels_fn = os.path.join(sstats_folder, 'labels_train.info')
        self.te_fn = os.path.join(sstats_folder, 'test_1_1_1_0.dat')
        self.sp_te_fn = os.path.join(sstats_folder, 'spatial_test_1_1_1_0.dat')
        self.te_labels_fn = os.path.join(sstats_folder, 'labels_test.info')

        self.gmm = load_gmm(gmm_fn)

        tr_labels = pickle.load(open(self.tr_labels_fn, 'r'))
        te_labels = pickle.load(open(self.te_labels_fn, 'r'))

        self.cx = tr_labels
        self.cy = te_labels
Пример #3
0
def master(src_cfg, suffix_in ,suffix_out, K, N, nr_processes, double_norm):
    D = 64

    dataset = Dataset(src_cfg, nr_clusters=K)
    samples = [str(sample) for sample in dataset.get_data('train')[0] +
               dataset.get_data('test')[0]]

    if double_norm:
        worker = double_normalization
        suffix = '.double_norm'
        gmm = load_gmm(
            os.path.join(
                dataset.FEAT_DIR + suffix_in, 'gmm',
                'gmm_%d' % K))
    else:
        worker = merge
        suffix = ''
        gmm = None

    path_in = os.path.join(
        dataset.FEAT_DIR + suffix_in,
        'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp')
    path_out = os.path.join(
        dataset.FEAT_DIR + suffix_out,
        'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp' + suffix)

    sstats_in = SstatsMap(path_in)
    sstats_out = SstatsMap(path_out)

    len_sstats = dataset.VOC_SIZE + 2 * D * dataset.VOC_SIZE

    kwargs = {
        'N': N,
        'sstats_in': sstats_in,
        'sstats_out': sstats_out,
        'len_sstats': len_sstats,
        'gmm': gmm}

    if nr_processes > 1:
        nr_samples_per_process = len(samples) / nr_processes + 1
        for ii in xrange(nr_processes):
            mp.Process(target=worker,
                       args=(samples[
                           ii * nr_samples_per_process:
                           (ii + 1) * nr_samples_per_process], ),
                       kwargs=kwargs).start()
    else:
        worker(samples, **kwargs)
Пример #4
0
def master(src_cfg, suffix_in, suffix_out, K, N, nr_processes, double_norm):
    D = 64

    dataset = Dataset(src_cfg, nr_clusters=K)
    samples = [
        str(sample) for sample in dataset.get_data('train')[0] +
        dataset.get_data('test')[0]
    ]

    if double_norm:
        worker = double_normalization
        suffix = '.double_norm'
        gmm = load_gmm(
            os.path.join(dataset.FEAT_DIR + suffix_in, 'gmm', 'gmm_%d' % K))
    else:
        worker = merge
        suffix = ''
        gmm = None

    path_in = os.path.join(dataset.FEAT_DIR + suffix_in,
                           'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp')
    path_out = os.path.join(dataset.FEAT_DIR + suffix_out,
                            'statistics_k_%d' % dataset.VOC_SIZE,
                            'stats.tmp' + suffix)

    sstats_in = SstatsMap(path_in)
    sstats_out = SstatsMap(path_out)

    len_sstats = dataset.VOC_SIZE + 2 * D * dataset.VOC_SIZE

    kwargs = {
        'N': N,
        'sstats_in': sstats_in,
        'sstats_out': sstats_out,
        'len_sstats': len_sstats,
        'gmm': gmm
    }

    if nr_processes > 1:
        nr_samples_per_process = len(samples) / nr_processes + 1
        for ii in xrange(nr_processes):
            mp.Process(target=worker,
                       args=(samples[ii * nr_samples_per_process:(ii + 1) *
                                     nr_samples_per_process], ),
                       kwargs=kwargs).start()
    else:
        worker(samples, **kwargs)
Пример #5
0
def evaluate_given_dataset(dataset, **kwargs):
    model_type = kwargs.get('model_type', 'fv')

    sstats_folder = dataset.SSTATS_DIR

    tr_fn = os.path.join(sstats_folder, 'train.dat')
    tr_labels_fn = os.path.join(sstats_folder, 'labels_train.info')

    te_fn = os.path.join(sstats_folder, 'test.dat')
    te_labels_fn = os.path.join(sstats_folder, 'labels_test.info')

    gmm = load_gmm(dataset.GMM)

    tr_labels = pickle.load(open(tr_labels_fn, 'r'))
    te_labels = pickle.load(open(te_labels_fn, 'r'))

    model = Model(model_type, gmm)
    model.compute_kernels([tr_fn], [te_fn])
    Kxx, Kyx = model.get_kernels()

    evaluation = Evaluation(dataset.DATASET, **kwargs)
    print evaluation.fit(Kxx, tr_labels).score(Kyx, te_labels)
Пример #6
0
def evaluate_given_dataset(dataset, **kwargs):
    model_type = kwargs.get('model_type', 'fv')

    sstats_folder = dataset.SSTATS_DIR

    tr_fn = os.path.join(sstats_folder, 'train.dat')
    tr_labels_fn = os.path.join(sstats_folder, 'labels_train.info')

    te_fn = os.path.join(sstats_folder, 'test.dat')
    te_labels_fn = os.path.join(sstats_folder, 'labels_test.info')

    gmm = load_gmm(dataset.GMM)

    tr_labels = pickle.load(open(tr_labels_fn, 'r'))
    te_labels = pickle.load(open(te_labels_fn, 'r'))

    model = Model(model_type, gmm)
    model.compute_kernels([tr_fn], [te_fn])
    Kxx, Kyx = model.get_kernels()

    evaluation = Evaluation(dataset.DATASET, **kwargs)
    print evaluation.fit(Kxx, tr_labels).score(Kyx, te_labels)
def discriminative_detection_per_class(class_idx, **kwargs):
    max_nr_iter = kwargs.get('max_nr_iter', 1)
    #dataset = kwargs.get('dataset', Dataset(
    #    'trecvid11_small', nr_clusters=128, suffix='.small.per_slice.delta_240'))
        #'trecvid11_small', nr_clusters=128, suffix='.small.per_slice'))
    src_cfg = kwargs.get('src_cfg')
    nr_clusters = kwargs.get('nr_clusters')
    suffix = kwargs.get('suffix')
    outfile = kwargs.get('outfile', FILE % (src_cfg, class_idx))
    nr_pos = kwargs.get('nr_pos', 10000)
    nr_neg = kwargs.get('nr_neg', 10000)
    agg_type = kwargs.get('agg_type', 'norm')
    use_nr_descs = kwargs.get('use_nr_descs', False)

    assert agg_type in ('norm', 'unnorm'), "Unknown aggregation type."

    dataset = Dataset(src_cfg, nr_clusters=nr_clusters, suffix=suffix)
    gmm = load_gmm(dataset.GMM)
    tr_slice_data = get_slice_data_from_file(
        dataset, 'train', class_idx, gmm, nr_pos, nr_neg)
    te_slice_data = get_slice_data_from_file(
        dataset, 'test', class_idx, gmm, nr_pos, nr_neg)
    tr_sample_labels = tr_slice_data.get_sample_labels()
    te_sample_labels = te_slice_data.get_sample_labels()
    for ii in xrange(max_nr_iter):
        print 'Iteration %d' % ii
        # Feature pooling.
        model = Model(gmm)
        if ii == 0:
            if not os.path.exists(outfile):
                print 'Aggregating statistics by the number of descriptors...'
                ss = tr_slice_data.get_aggregated_by_nr_descs()
                np.array(ss, dtype=np.float32).tofile(outfile)
                #tr_sample_sstats = [ss, ss]
                tr_sample_sstats = [ss] * 2
            else:
                print 'Loaded aggregated statistics...'
                # Cache results.
                ss = np.fromfile(
                    outfile, dtype=np.float32).reshape((-1, model.D))
                #tr_sample_sstats = [ss, ss]
                tr_sample_sstats = [ss] * 2
        else:
            tr_sample_sstats = tr_slice_data.get_aggregated(
                agg_type, use_nr_descs)
        # Fisher vectors on pooled features.
        tr_kernel = model.get_tr_kernel(tr_sample_sstats)
        # Train classifier on pooled features.
        _eval = Evaluation()
        _eval = _eval.fit(tr_kernel, tr_sample_labels)
        # Update weights.
        tr_slice_data.update_scores(_eval, model)
        te_slice_data.update_scores(_eval, model)
        # TODO Save data.
        tr_slice_data.save_htlist(ii)
        te_slice_data.save_htlist(ii)
        del _eval
        del model
    # Final retraining and evaluation.
    tr_sample_sstats = tr_slice_data.get_aggregated(agg_type, use_nr_descs)
    te_sample_sstats = te_slice_data.get_aggregated(agg_type, use_nr_descs)
    model = Model(gmm)
    tr_kernel = model.get_tr_kernel(tr_sample_sstats)
    te_kernel = model.get_te_kernel(te_sample_sstats)
    _eval = Evaluation()
    _eval = _eval.fit(tr_kernel, tr_sample_labels)
    score = _eval.score(te_kernel, te_sample_labels)
    print 'Class %d score %2.3f' % (class_idx, score)
    return score