def analyze_cifar100(index, parallel=False, num_thd=2, isrand=False, backend='multiprocessing', **kwargs): #train = unpickle("./data/cifar-100-python/train") #X = train[b'data'] test = unpickle("./data/cifar-100-python/test") X = csr_matrix(test[b'data']) rd_str = 'rand' if isrand else '' fn = ('./output/cifar100_{}agg/cifar100_{}agg_{}_Z.json'.format( rd_str, rd_str, index)) print('loading {}'.format(fn)) Z = load_json_Z(fn) scores = evaluation(X, Z, parallel, num_thd, backend, **kwargs) df = pd.DataFrame(scores, columns=['stop_at', 'prob', 'reward', 'V']) path = './output/scores/cifar100_{}agg_{}.csv'.format(rd_str, index) create_path(path) df.to_csv(path, index=False) return scores
def analyze_amazon(index, parallel=False, num_thd=2, backend='multiprocessing', **kwargs): X = load_npz('./data/amazon_hqs.npz') Z = load_json_Z('./data/amazon_tree_{}_Z.json'.format(index)) scores = evaluation(X, Z, parallel, num_thd, backend, **kwargs) df = pd.DataFrame(scores, columns=['stop_at', 'prob', 'reward', 'V']) path = './output/scores/amazon_{}.csv'.format(index) create_path(path) df.to_csv(path, index=False) return scores
def eval_clust(data_name, method, data=None, index=0, parallel=False, num_thd=2, backend='multiprocessing', **kwargs): ''' method: {'ap', 'kmeans', ... } data: if data is None, we need to process it with the TFIDF score parallel: the switch for single or multi- processing for the evaluation num_thd: number of threads (only valid when parallel is true) backend: only valid when parallel is true ''' if data is None: data = load_mat_data('./data/{}/{}_tfidf.mat'.format( data_name, data_name)) X = data['tfidf'] else: X = data if 'kmeans' in method: method_tmp = 'kmeans' else: method_tmp = method try: tree, Z = load_cluster( './output/results/{}_{}'.format(data_name, method), '{}_{}_{}'.format(data_name, method_tmp, index)) except: tree, Z = load_cluster( './output/results/{}_{}'.format(data_name, method), '{}_{}_{}'.format(data_name, method, index)) scores = evaluation(X, Z, parallel, num_thd, backend, **kwargs) df = pd.DataFrame(scores, columns=['V']) path = ('./output/scores/test/{}_{}/stoch_{}_{}_{}.csv'.format( data_name, method, data_name, method, index)) create_path(path) df.to_csv(path, index=False) return scores
def analyze_amazon_large(index, parallel=False, num_thd=2, backend='multiprocessing', **kwargs): df = pd.read_csv('./data/amazon_hqs_large_scale_values.csv', sep='\t', header=None) X = csr_matrix(df.iloc[:, 1:].values) del df Z = load_json_Z( './output/amazon-large_agg/amazon-large_agg_{}_Z.json'.format(index)) scores = evaluation(X, Z, parallel, num_thd, backend, **kwargs) df = pd.DataFrame(scores, columns=['stop_at', 'prob', 'reward', 'V']) path = './output/scores/amazon_agg_{}.csv'.format(index) create_path(path) df.to_csv(path, index=False) return scores