def init(): global weight_mode, sim_threshold, sim_mode, attack_ratings, original_ratings, user_size, item_size, translator, correct, total, eccen parser = argparse.ArgumentParser( description='a ns attack simulation and statitical analysis') parser.add_argument('-r', '--ratings', required=True) parser.add_argument('-t', '--total', required=True, type=int) parser.add_argument('-c', '--correct', required=True, type=int) parser.add_argument('-e', '--eccen', type=float) parser.add_argument('-m,', '--mode', choices=mode_choices, default='exp') parser.add_argument('-w', '--weight', choices=['equal', 'less'], default='less') args = parser.parse_args() weight_mode = args.weight sim_mode = args.mode attack_ratings = load(args.ratings) dataset = extract_dataset_from_filename(args.ratings) original_ratings = load(get_ratings_name_from_dataset(dataset)) user_size = original_ratings.shape[0] item_size = original_ratings.shape[1] translator = get_id_translator(args.ratings) correct = args.correct total = args.total eccen = args.eccen
def main(): parser = argparse.ArgumentParser(description='RMSE test for a certain dataset') parser.add_argument('-d', '--dataset', required=True, choices=dataset_choices) parser.add_argument('-w', '--web', required=True) parser.add_argument('-t', '--top', type=int) parser.add_argument('-a', '--adapter') args = parser.parse_args() data_set = args.dataset web_name = args.web top = args.top adapter_kind = args.adapter temp_tops = [5, 10, 20, 40, 60, 70, 80, 90, 100] original_ratings = load(get_ratings_name_from_dataset(data_set)) test_set = np.loadtxt(directory + data_set + '.test', delimiter='\t') if web_name == 'all' and adapter_kind != 'all': web_names = get_all_web_files() plt.figure() plt.xlabel('top') plt.ylabel('RMSE') best = {'top': None, 'RMSE': 2, 'web': None} for web_name in web_names: y = [] web = load(web_name) for temp_top in temp_tops: logging.info({'web': web_name, 'top': temp_top, 'adapter': adapter_kind}) count = RMSE(test_set, original_ratings, web, temp_top, adapter_kind) y.append(count['RMSE']) if min(y) < best['RMSE']: best = {'web': web_name, 'RMSE': min(y), 'top': temp_tops[y.index(min(y))]} logging.info('%s:%s', web_name, y) plt.plot(temp_tops, y, marker='*', label=web_name) plt.legend() plt.savefig('top-RMSE-webs.jpg') logging.info('best:%s', best) elif web_name != 'all' and adapter_kind == 'all': adapter_kinds = ['int', 'round', 'customize', 'int1'] plt.figure() plt.xlabel('top') plt.ylabel('RMSE') web = load(web_name) best = {'top': None, 'RMSE': 2, 'adapter': None} for adapter_kind in adapter_kinds: y = [] for temp_top in temp_tops: logging.info({'web': web_name, 'top': temp_top, 'adapter': adapter_kind}) count = RMSE(test_set, original_ratings, web, temp_top, adapter_kind) y.append(count['RMSE']) if min(y) < best['RMSE']: best = {'adapter': adapter_kind, 'RMSE': min(y), 'top': temp_tops[y.index(min(y))]} logging.info('%s:%s', adapter_kind, y) plt.plot(temp_tops, y, marker='*', label=adapter_kind) plt.legend() plt.savefig('top-RMSE-adapters.jpg') logging.info('best:%s', best) elif web_name != 'all' and adapter_kind != 'all': logging.info(RMSE(test_set, original_ratings, load(web_name), top, adapter_kind)) else: raise ValueError
def main(): parser = argparse.ArgumentParser( description='k corating a rating file by a certain web') parser.add_argument('-d', '--database', required=True) parser.add_argument('-k', type=int, required=True) parser.add_argument('-m', '--mode', required=True) parser.add_argument('-a', '--analysis', action='store_true') parser.add_argument('-t', '--trial', type=int, default=default_trial_times) args = parser.parse_args() original_ratings = load(get_ratings_name_from_dataset(args.database)) trial_times = args.trial k = args.k mode = args.mode need_analysis = args.analysis if os.path.exists('nratings_' + args.database + '.csv'): normalized_ratings = load('nratings_' + args.database) else: normalized_ratings = np.zeros(shape=(original_ratings.shape), dtype=int) for i in range(original_ratings.shape[0]): normalized_ratings[i, :] = normalize(original_ratings[i, :]) dump('nratings_%s.csv' % args.database, normalized_ratings) Cluster.normalized_ratings = normalized_ratings if os.path.exists('dis_map.csv'): dis_map = load('dis_map.csv') else: dis_map = None if k != 0: best_seeds = get_best_initial_seeds(original_ratings, normalized_ratings, k, mode, dis_map, trial_times) best_clusters = k_means(best_seeds) dump_clusters(best_clusters) if need_analysis: analysis_of_clusters(best_clusters) else: if mode == 'all': modes = ['density', 'dsort', 'rsort', 'random'] else: modes = [mode] find_best_k(original_ratings, normalized_ratings, modes, dis_map, trial_times)
def k_corated(webname): global sorted_ratings if cluster_flag: clusters = load_clusters(load('nratings_' + data_set), k) sorted_ratings = sort_through_clusters(original_ratings, clusters) k_corated, index_translator = corating_all_through_clusters( sorted_ratings.copy(), clusters) else: sorted_ratings = sort(original_ratings) k_corated, index_translator = k_corating_all( sorted_ratings.copy(), k) k_file_name = get_k_corated_name_by_attr(data_set, k, webname) index_file_name = get_k_corated_index_by_attr(data_set, k, webname) dump(k_file_name, k_corated) dump(index_file_name, index_translator)
if i != 0: index = int(i) - 1 count[index] += 1 return count all_count = [0] * rating_scale items_count = [] item_size = original_ratings.shape[1] for i in range(item_size): item = original_ratings[:, i] temp_count = get_count(item) items_count.append(temp_count) all_count = list(np.array(all_count) + np.array(temp_count)) # for i in range(rating_scale): # plt.figure() # label = 'portion of %s' % (i + 1) # x = [i for i in range(item_size)] # y = [temp_count[i] / sum(temp_count) for temp_count in items_count] # plt.plot(x, y, label=label) # plt.legend() # plt.savefig('items_portion_%s.jpg' % (i + 1)) logging.info([i / sum(all_count) for i in all_count]) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-r', '--ratings', required=True) args = parser.parse_args() original_ratings = load(args.ratings) ratia_analysis(original_ratings)
def init(dataset, web_name): global original_ratings, web original_ratings = load(get_ratings_name_from_dataset(dataset)) web = load(web_name)