示例#1
0
def analyze_cifar100(index,
                     parallel=False,
                     num_thd=2,
                     isrand=False,
                     backend='multiprocessing',
                     **kwargs):

    #train = unpickle("./data/cifar-100-python/train")
    #X = train[b'data']
    test = unpickle("./data/cifar-100-python/test")
    X = csr_matrix(test[b'data'])

    rd_str = 'rand' if isrand else ''

    fn = ('./output/cifar100_{}agg/cifar100_{}agg_{}_Z.json'.format(
        rd_str, rd_str, index))

    print('loading {}'.format(fn))
    Z = load_json_Z(fn)

    scores = evaluation(X, Z, parallel, num_thd, backend, **kwargs)
    df = pd.DataFrame(scores, columns=['stop_at', 'prob', 'reward', 'V'])

    path = './output/scores/cifar100_{}agg_{}.csv'.format(rd_str, index)
    create_path(path)
    df.to_csv(path, index=False)

    return scores
示例#2
0
def analyze_amazon(index,
                   parallel=False,
                   num_thd=2,
                   backend='multiprocessing',
                   **kwargs):
    X = load_npz('./data/amazon_hqs.npz')

    Z = load_json_Z('./data/amazon_tree_{}_Z.json'.format(index))

    scores = evaluation(X, Z, parallel, num_thd, backend, **kwargs)
    df = pd.DataFrame(scores, columns=['stop_at', 'prob', 'reward', 'V'])

    path = './output/scores/amazon_{}.csv'.format(index)
    create_path(path)
    df.to_csv(path, index=False)

    return scores
示例#3
0
def eval_clust(data_name,
               method,
               data=None,
               index=0,
               parallel=False,
               num_thd=2,
               backend='multiprocessing',
               **kwargs):
    ''' method: {'ap', 'kmeans', ... }
        data: if data is None, we need to process it with the TFIDF score
        parallel: the switch for single or multi- processing for the evaluation
        num_thd: number of threads (only valid when parallel is true)
        backend: only valid when parallel is true
    '''
    if data is None:
        data = load_mat_data('./data/{}/{}_tfidf.mat'.format(
            data_name, data_name))
        X = data['tfidf']
    else:
        X = data

    if 'kmeans' in method:
        method_tmp = 'kmeans'
    else:
        method_tmp = method

    try:
        tree, Z = load_cluster(
            './output/results/{}_{}'.format(data_name, method),
            '{}_{}_{}'.format(data_name, method_tmp, index))
    except:
        tree, Z = load_cluster(
            './output/results/{}_{}'.format(data_name, method),
            '{}_{}_{}'.format(data_name, method, index))

    scores = evaluation(X, Z, parallel, num_thd, backend, **kwargs)
    df = pd.DataFrame(scores, columns=['V'])

    path = ('./output/scores/test/{}_{}/stoch_{}_{}_{}.csv'.format(
        data_name, method, data_name, method, index))
    create_path(path)

    df.to_csv(path, index=False)

    return scores
示例#4
0
def analyze_amazon_large(index,
                         parallel=False,
                         num_thd=2,
                         backend='multiprocessing',
                         **kwargs):
    df = pd.read_csv('./data/amazon_hqs_large_scale_values.csv',
                     sep='\t',
                     header=None)
    X = csr_matrix(df.iloc[:, 1:].values)
    del df

    Z = load_json_Z(
        './output/amazon-large_agg/amazon-large_agg_{}_Z.json'.format(index))

    scores = evaluation(X, Z, parallel, num_thd, backend, **kwargs)
    df = pd.DataFrame(scores, columns=['stop_at', 'prob', 'reward', 'V'])

    path = './output/scores/amazon_agg_{}.csv'.format(index)
    create_path(path)
    df.to_csv(path, index=False)

    return scores