def evaluate(): selection = sys.argv[1:] ref_tr_vidnames = None ref_te_vidnames = None for cname in selection: factor = 1.0 if '*' in cname: factor, cname = cname.split('*') factor = float(factor) feature, params = combinations[cname] print "load feature", cname, "factor", factor tr_data, tr_labels, tr_vidnames = get_data(feature, 'train_balanced', **params) te_data, te_labels, te_vidnames = get_data(feature, 'test_balanced', **params) print "compute kernels train %d*%d test %d*%d" % ( tr_data.shape + te_data.shape) if ref_tr_vidnames != None: print "remapping names" # pdb.set_trace() te_data = remap_descriptors(te_data, te_vidnames, ref_te_vidnames) tr_data = remap_descriptors(tr_data, tr_vidnames, ref_tr_vidnames) Kxx, Kyx = data_to_kernels(tr_data, te_data) Kxx *= factor Kyx *= factor if ref_tr_vidnames == None: tr_kernel = Kxx te_kernel = Kyx ref_te_vidnames = te_vidnames ref_tr_vidnames = tr_vidnames else: tr_kernel += Kxx te_kernel += Kyx from fisher_vectors.evaluation import trecvid12_parallel as eval fit_out = eval.fit(tr_kernel, tr_labels) print eval.score(te_kernel, te_labels, fit_out)
def vary_nr_negatives(): null_class_idx = 0 feature = 'mbh' params = { 'dummy': {}, 'mbh': {'suffix': '_morenull'}, 'sift': {'subsample': 10, 'nr_clusters': 64, 'color': 0}} tr_data, tr_labels, _ = get_data(feature, 'train', **params[feature]) te_data, te_labels, _ = get_data(feature, 'test', **params[feature]) outfilename = '/home/lear/oneata/data/trecvid12/results/tmp.txt' with open(outfilename, 'a') as ff: ff.write('%s %s\n' % (feature, params[feature].__str__())) ii, nr_repeats = 0, 5 for ii in xrange(nr_repeats): for proportion in (0.02, 0.04, 0.08, 0.1, 0.2, 0.4, 0.8, 1.): if proportion < 1.0: idxs = subsample_null_class(tr_labels, proportion, ii) else: idxs = np.arange(len(tr_labels)) random.seed(ii) random.shuffle(idxs) _tr_data, _tr_labels = tr_data[idxs], tr_labels[idxs] tr_kernel, te_kernel = data_to_kernels(tr_data, te_data) #eval = Evaluation('trecvid12', eval_type='trecvid11') #score = eval.fit(tr_kernel, tr_labels).score(te_kernel, te_labels) from fisher_vectors.evaluation import trecvid12_parallel as eval fit_out = eval.fit(tr_kernel, _tr_labels) score = eval.score(te_kernel, te_labels, fit_out) print score ff.write('%1.2f %d %2.3f \n' % ( proportion, len(idxs) - len(_tr_labels[_tr_labels != null_class_idx]), score))