(all_cat_lbls['train'], all_attr_lbls['train'])) bin_labels_test = np.hstack((all_cat_lbls['test'], all_attr_lbls['test'])) print 'num train', bin_labels_train.shape[0] print 'num test', bin_labels_test.shape[0] res = {} ela_types = {'rand', 'pop', 'backoff', 'dist'} for et in ela_types: print '***** %s *****' % et res[et] = {} dtree = {} for numq in [10, 20, 30, 40, 50, 80, 100]: sgraph = cooccurrence.SGraph(train=bin_labels_train, dtree=dtree, ela_type=et, ela_limit_type='numq', ela_limit=numq) res[et][numq] = [] for ind, row in enumerate(bin_labels_test[:100]): item = sgraph.test(row, known_inds=range(num_cat)) res[et][numq].append(item[2]) print 'numq = %d: rec %d = %.2f' % (numq, ind, item[2]) dtree = sgraph.dtree # Need to go back through and look for rare attributes # TODO: save ELA labeled instances for each major marker # for M 'rarest' attributes
bin_labels_train = bin_labels[:7000][:] bin_labels_test = bin_labels[7000:8000][:] # res = {} # ela_types = ['rand', 'pop', 'dist'] # 'backoff', res = joblib.load('data/sun_attr_rec_benchmark_mle_et_threshold.jbl') ela_types = ['backoff'] for et in ela_types: print '***** %s *****' % et res[et] = {} for thresh in np.arange(0.005, 0.055, 0.005): dtree = {} print 'threshold : ' + str(thresh) sgraph = cooccurrence.SGraph(train=bin_labels_train, dtree=dtree, ela_type=et, ela_limit_type='threshold', ela_limit=thresh) res[et][thresh] = {} res[et][thresh]['rec'] = [] res[et][thresh]['numq'] = [] for ind, row in enumerate(bin_labels_test[:100][:]): item = sgraph.test(row) res[et][thresh]['rec'].append(item[2]) res[et][thresh]['numq'].append(len(item[3])) print 'thresh = %.3f: rec %d = %.2f, numq = %d' % ( thresh, ind, item[2], len(item[3])) dtree = sgraph.dtree