def setup(self): # These scores are achieved for the following evaluation parameters. # class_weights='auto', C from 3 ** -2 .. 3 ** 8, and one shuffle # split with test size of 0.25. self.expected_scores = {'fv': 55.226, 'fv_sfv': 56.718, 'bow': 42.896} src_cfg = 'hollywood2_clean' nr_clusters = 100 self.dataset = Dataset(src_cfg, ip_type='dense5.track15mbh', suffix='.original', nr_clusters=nr_clusters) sstats_folder = self.dataset.SSTATS_DIR gmm_fn = self.dataset.GMM self.tr_fn = os.path.join(sstats_folder, 'train_1_1_1_0.dat') self.sp_tr_fn = os.path.join(sstats_folder, 'spatial_train_1_1_1_0.dat') self.tr_labels_fn = os.path.join(sstats_folder, 'labels_train.info') self.te_fn = os.path.join(sstats_folder, 'test_1_1_1_0.dat') self.sp_te_fn = os.path.join(sstats_folder, 'spatial_test_1_1_1_0.dat') self.te_labels_fn = os.path.join(sstats_folder, 'labels_test.info') self.gmm = load_gmm(gmm_fn) tr_labels = pickle.load(open(self.tr_labels_fn, 'r')) te_labels = pickle.load(open(self.te_labels_fn, 'r')) self.cx = tr_labels self.cy = te_labels
def setup(self): # These scores are achieved for the following evaluation parameters. # class_weights='auto', C from 3 ** -2 .. 3 ** 8, and one shuffle # split with test size of 0.25. self.expected_scores = {'fv': 55.226, 'fv_sfv': 56.718, 'bow': 42.896} src_cfg = 'hollywood2_clean' nr_clusters = 100 self.dataset = Dataset(src_cfg, ip_type='dense5.track15mbh', suffix='.original', nr_clusters=nr_clusters) sstats_folder = self.dataset.SSTATS_DIR gmm_fn = self.dataset.GMM self.tr_fn = os.path.join(sstats_folder, 'train_1_1_1_0.dat') self.sp_tr_fn = os.path.join(sstats_folder, 'spatial_train_1_1_1_0.dat') self.tr_labels_fn = os.path.join(sstats_folder, 'labels_train.info') self.te_fn = os.path.join(sstats_folder, 'test_1_1_1_0.dat') self.sp_te_fn = os.path.join(sstats_folder, 'spatial_test_1_1_1_0.dat') self.te_labels_fn = os.path.join(sstats_folder, 'labels_test.info') self.gmm = load_gmm(gmm_fn) tr_labels = pickle.load(open(self.tr_labels_fn, 'r')) te_labels = pickle.load(open(self.te_labels_fn, 'r')) self.cx = tr_labels self.cy = te_labels
def master(src_cfg, suffix_in ,suffix_out, K, N, nr_processes, double_norm): D = 64 dataset = Dataset(src_cfg, nr_clusters=K) samples = [str(sample) for sample in dataset.get_data('train')[0] + dataset.get_data('test')[0]] if double_norm: worker = double_normalization suffix = '.double_norm' gmm = load_gmm( os.path.join( dataset.FEAT_DIR + suffix_in, 'gmm', 'gmm_%d' % K)) else: worker = merge suffix = '' gmm = None path_in = os.path.join( dataset.FEAT_DIR + suffix_in, 'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp') path_out = os.path.join( dataset.FEAT_DIR + suffix_out, 'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp' + suffix) sstats_in = SstatsMap(path_in) sstats_out = SstatsMap(path_out) len_sstats = dataset.VOC_SIZE + 2 * D * dataset.VOC_SIZE kwargs = { 'N': N, 'sstats_in': sstats_in, 'sstats_out': sstats_out, 'len_sstats': len_sstats, 'gmm': gmm} if nr_processes > 1: nr_samples_per_process = len(samples) / nr_processes + 1 for ii in xrange(nr_processes): mp.Process(target=worker, args=(samples[ ii * nr_samples_per_process: (ii + 1) * nr_samples_per_process], ), kwargs=kwargs).start() else: worker(samples, **kwargs)
def master(src_cfg, suffix_in, suffix_out, K, N, nr_processes, double_norm): D = 64 dataset = Dataset(src_cfg, nr_clusters=K) samples = [ str(sample) for sample in dataset.get_data('train')[0] + dataset.get_data('test')[0] ] if double_norm: worker = double_normalization suffix = '.double_norm' gmm = load_gmm( os.path.join(dataset.FEAT_DIR + suffix_in, 'gmm', 'gmm_%d' % K)) else: worker = merge suffix = '' gmm = None path_in = os.path.join(dataset.FEAT_DIR + suffix_in, 'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp') path_out = os.path.join(dataset.FEAT_DIR + suffix_out, 'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp' + suffix) sstats_in = SstatsMap(path_in) sstats_out = SstatsMap(path_out) len_sstats = dataset.VOC_SIZE + 2 * D * dataset.VOC_SIZE kwargs = { 'N': N, 'sstats_in': sstats_in, 'sstats_out': sstats_out, 'len_sstats': len_sstats, 'gmm': gmm } if nr_processes > 1: nr_samples_per_process = len(samples) / nr_processes + 1 for ii in xrange(nr_processes): mp.Process(target=worker, args=(samples[ii * nr_samples_per_process:(ii + 1) * nr_samples_per_process], ), kwargs=kwargs).start() else: worker(samples, **kwargs)
def evaluate_given_dataset(dataset, **kwargs): model_type = kwargs.get('model_type', 'fv') sstats_folder = dataset.SSTATS_DIR tr_fn = os.path.join(sstats_folder, 'train.dat') tr_labels_fn = os.path.join(sstats_folder, 'labels_train.info') te_fn = os.path.join(sstats_folder, 'test.dat') te_labels_fn = os.path.join(sstats_folder, 'labels_test.info') gmm = load_gmm(dataset.GMM) tr_labels = pickle.load(open(tr_labels_fn, 'r')) te_labels = pickle.load(open(te_labels_fn, 'r')) model = Model(model_type, gmm) model.compute_kernels([tr_fn], [te_fn]) Kxx, Kyx = model.get_kernels() evaluation = Evaluation(dataset.DATASET, **kwargs) print evaluation.fit(Kxx, tr_labels).score(Kyx, te_labels)
def evaluate_given_dataset(dataset, **kwargs): model_type = kwargs.get('model_type', 'fv') sstats_folder = dataset.SSTATS_DIR tr_fn = os.path.join(sstats_folder, 'train.dat') tr_labels_fn = os.path.join(sstats_folder, 'labels_train.info') te_fn = os.path.join(sstats_folder, 'test.dat') te_labels_fn = os.path.join(sstats_folder, 'labels_test.info') gmm = load_gmm(dataset.GMM) tr_labels = pickle.load(open(tr_labels_fn, 'r')) te_labels = pickle.load(open(te_labels_fn, 'r')) model = Model(model_type, gmm) model.compute_kernels([tr_fn], [te_fn]) Kxx, Kyx = model.get_kernels() evaluation = Evaluation(dataset.DATASET, **kwargs) print evaluation.fit(Kxx, tr_labels).score(Kyx, te_labels)
def discriminative_detection_per_class(class_idx, **kwargs): max_nr_iter = kwargs.get('max_nr_iter', 1) #dataset = kwargs.get('dataset', Dataset( # 'trecvid11_small', nr_clusters=128, suffix='.small.per_slice.delta_240')) #'trecvid11_small', nr_clusters=128, suffix='.small.per_slice')) src_cfg = kwargs.get('src_cfg') nr_clusters = kwargs.get('nr_clusters') suffix = kwargs.get('suffix') outfile = kwargs.get('outfile', FILE % (src_cfg, class_idx)) nr_pos = kwargs.get('nr_pos', 10000) nr_neg = kwargs.get('nr_neg', 10000) agg_type = kwargs.get('agg_type', 'norm') use_nr_descs = kwargs.get('use_nr_descs', False) assert agg_type in ('norm', 'unnorm'), "Unknown aggregation type." dataset = Dataset(src_cfg, nr_clusters=nr_clusters, suffix=suffix) gmm = load_gmm(dataset.GMM) tr_slice_data = get_slice_data_from_file( dataset, 'train', class_idx, gmm, nr_pos, nr_neg) te_slice_data = get_slice_data_from_file( dataset, 'test', class_idx, gmm, nr_pos, nr_neg) tr_sample_labels = tr_slice_data.get_sample_labels() te_sample_labels = te_slice_data.get_sample_labels() for ii in xrange(max_nr_iter): print 'Iteration %d' % ii # Feature pooling. model = Model(gmm) if ii == 0: if not os.path.exists(outfile): print 'Aggregating statistics by the number of descriptors...' ss = tr_slice_data.get_aggregated_by_nr_descs() np.array(ss, dtype=np.float32).tofile(outfile) #tr_sample_sstats = [ss, ss] tr_sample_sstats = [ss] * 2 else: print 'Loaded aggregated statistics...' # Cache results. ss = np.fromfile( outfile, dtype=np.float32).reshape((-1, model.D)) #tr_sample_sstats = [ss, ss] tr_sample_sstats = [ss] * 2 else: tr_sample_sstats = tr_slice_data.get_aggregated( agg_type, use_nr_descs) # Fisher vectors on pooled features. tr_kernel = model.get_tr_kernel(tr_sample_sstats) # Train classifier on pooled features. _eval = Evaluation() _eval = _eval.fit(tr_kernel, tr_sample_labels) # Update weights. tr_slice_data.update_scores(_eval, model) te_slice_data.update_scores(_eval, model) # TODO Save data. tr_slice_data.save_htlist(ii) te_slice_data.save_htlist(ii) del _eval del model # Final retraining and evaluation. tr_sample_sstats = tr_slice_data.get_aggregated(agg_type, use_nr_descs) te_sample_sstats = te_slice_data.get_aggregated(agg_type, use_nr_descs) model = Model(gmm) tr_kernel = model.get_tr_kernel(tr_sample_sstats) te_kernel = model.get_te_kernel(te_sample_sstats) _eval = Evaluation() _eval = _eval.fit(tr_kernel, tr_sample_labels) score = _eval.score(te_kernel, te_sample_labels) print 'Class %d score %2.3f' % (class_idx, score) return score