示例#1
0
        (all_cat_lbls['train'], all_attr_lbls['train']))
    bin_labels_test = np.hstack((all_cat_lbls['test'], all_attr_lbls['test']))
    print 'num train', bin_labels_train.shape[0]
    print 'num test', bin_labels_test.shape[0]
    res = {}

    ela_types = {'rand', 'pop', 'backoff', 'dist'}
    for et in ela_types:
        print '***** %s *****' % et
        res[et] = {}
        dtree = {}
        for numq in [10, 20, 30, 40, 50, 80, 100]:

            sgraph = cooccurrence.SGraph(train=bin_labels_train,
                                         dtree=dtree,
                                         ela_type=et,
                                         ela_limit_type='numq',
                                         ela_limit=numq)

            res[et][numq] = []

            for ind, row in enumerate(bin_labels_test[:100]):
                item = sgraph.test(row, known_inds=range(num_cat))
                res[et][numq].append(item[2])
                print 'numq = %d: rec %d = %.2f' % (numq, ind, item[2])

            dtree = sgraph.dtree

    # Need to go back through and look for rare attributes
    # TODO: save ELA labeled instances for each major marker
    # for M 'rarest' attributes
示例#2
0
    bin_labels_train = bin_labels[:7000][:]
    bin_labels_test = bin_labels[7000:8000][:]

    # res = {}
    # ela_types = ['rand', 'pop', 'dist'] # 'backoff',
    res = joblib.load('data/sun_attr_rec_benchmark_mle_et_threshold.jbl')
    ela_types = ['backoff']
    for et in ela_types:
        print '***** %s *****' % et
        res[et] = {}
        for thresh in np.arange(0.005, 0.055, 0.005):
            dtree = {}
            print 'threshold : ' + str(thresh)
            sgraph = cooccurrence.SGraph(train=bin_labels_train,
                                         dtree=dtree,
                                         ela_type=et,
                                         ela_limit_type='threshold',
                                         ela_limit=thresh)

            res[et][thresh] = {}
            res[et][thresh]['rec'] = []
            res[et][thresh]['numq'] = []

            for ind, row in enumerate(bin_labels_test[:100][:]):
                item = sgraph.test(row)
                res[et][thresh]['rec'].append(item[2])
                res[et][thresh]['numq'].append(len(item[3]))
                print 'thresh = %.3f: rec %d = %.2f, numq = %d' % (
                    thresh, ind, item[2], len(item[3]))

            dtree = sgraph.dtree