Пример #1
0
def init():
    global weight_mode, sim_threshold, sim_mode, attack_ratings, original_ratings, user_size, item_size, translator, correct, total, eccen
    parser = argparse.ArgumentParser(
        description='a ns attack simulation and statitical analysis')
    parser.add_argument('-r', '--ratings', required=True)
    parser.add_argument('-t', '--total', required=True, type=int)
    parser.add_argument('-c', '--correct', required=True, type=int)
    parser.add_argument('-e', '--eccen', type=float)
    parser.add_argument('-m,', '--mode', choices=mode_choices, default='exp')
    parser.add_argument('-w',
                        '--weight',
                        choices=['equal', 'less'],
                        default='less')
    args = parser.parse_args()
    weight_mode = args.weight
    sim_mode = args.mode
    attack_ratings = load(args.ratings)
    dataset = extract_dataset_from_filename(args.ratings)
    original_ratings = load(get_ratings_name_from_dataset(dataset))
    user_size = original_ratings.shape[0]
    item_size = original_ratings.shape[1]
    translator = get_id_translator(args.ratings)
    correct = args.correct
    total = args.total
    eccen = args.eccen
Пример #2
0
def main():
    parser = argparse.ArgumentParser(description='RMSE test for a certain dataset')
    parser.add_argument('-d', '--dataset', required=True, choices=dataset_choices)
    parser.add_argument('-w', '--web', required=True)
    parser.add_argument('-t', '--top', type=int)
    parser.add_argument('-a', '--adapter')
    args = parser.parse_args()
    data_set = args.dataset
    web_name = args.web
    top = args.top
    adapter_kind = args.adapter
    temp_tops = [5, 10, 20, 40, 60, 70, 80, 90, 100]
    original_ratings = load(get_ratings_name_from_dataset(data_set))
    test_set = np.loadtxt(directory + data_set + '.test', delimiter='\t')
    if web_name == 'all' and adapter_kind != 'all':
        web_names = get_all_web_files()
        plt.figure()
        plt.xlabel('top')
        plt.ylabel('RMSE')
        best = {'top': None, 'RMSE': 2, 'web': None}
        for web_name in web_names:
            y = []
            web = load(web_name)
            for temp_top in temp_tops:
                logging.info({'web': web_name, 'top': temp_top, 'adapter': adapter_kind})
                count = RMSE(test_set, original_ratings, web, temp_top, adapter_kind)
                y.append(count['RMSE'])
            if min(y) < best['RMSE']:
                best = {'web': web_name, 'RMSE': min(y), 'top': temp_tops[y.index(min(y))]}
            logging.info('%s:%s', web_name, y)
            plt.plot(temp_tops, y, marker='*', label=web_name)
        plt.legend()
        plt.savefig('top-RMSE-webs.jpg')
        logging.info('best:%s', best)
    elif web_name != 'all' and adapter_kind == 'all':
        adapter_kinds = ['int', 'round', 'customize', 'int1']
        plt.figure()
        plt.xlabel('top')
        plt.ylabel('RMSE')
        web = load(web_name)
        best = {'top': None, 'RMSE': 2, 'adapter': None}
        for adapter_kind in adapter_kinds:
            y = []
            for temp_top in temp_tops:
                logging.info({'web': web_name, 'top': temp_top, 'adapter': adapter_kind})
                count = RMSE(test_set, original_ratings, web, temp_top, adapter_kind)
                y.append(count['RMSE'])
            if min(y) < best['RMSE']:
                best = {'adapter': adapter_kind, 'RMSE': min(y), 'top': temp_tops[y.index(min(y))]}
            logging.info('%s:%s', adapter_kind, y)
            plt.plot(temp_tops, y, marker='*', label=adapter_kind)
        plt.legend()
        plt.savefig('top-RMSE-adapters.jpg')
        logging.info('best:%s', best)
    elif web_name != 'all' and adapter_kind != 'all':
        logging.info(RMSE(test_set, original_ratings, load(web_name), top, adapter_kind))
    else:
        raise ValueError
Пример #3
0
def main():
    parser = argparse.ArgumentParser(
        description='k corating a rating file by a certain web')
    parser.add_argument('-d', '--database', required=True)
    parser.add_argument('-k', type=int, required=True)
    parser.add_argument('-m', '--mode', required=True)
    parser.add_argument('-a', '--analysis', action='store_true')
    parser.add_argument('-t', '--trial', type=int, default=default_trial_times)
    args = parser.parse_args()
    original_ratings = load(get_ratings_name_from_dataset(args.database))
    trial_times = args.trial
    k = args.k
    mode = args.mode
    need_analysis = args.analysis
    if os.path.exists('nratings_' + args.database + '.csv'):
        normalized_ratings = load('nratings_' + args.database)
    else:
        normalized_ratings = np.zeros(shape=(original_ratings.shape),
                                      dtype=int)
        for i in range(original_ratings.shape[0]):
            normalized_ratings[i, :] = normalize(original_ratings[i, :])
        dump('nratings_%s.csv' % args.database, normalized_ratings)
    Cluster.normalized_ratings = normalized_ratings
    if os.path.exists('dis_map.csv'):
        dis_map = load('dis_map.csv')
    else:
        dis_map = None
    if k != 0:
        best_seeds = get_best_initial_seeds(original_ratings,
                                            normalized_ratings, k, mode,
                                            dis_map, trial_times)
        best_clusters = k_means(best_seeds)
        dump_clusters(best_clusters)
        if need_analysis:
            analysis_of_clusters(best_clusters)
    else:
        if mode == 'all':
            modes = ['density', 'dsort', 'rsort', 'random']
        else:
            modes = [mode]
        find_best_k(original_ratings, normalized_ratings, modes, dis_map,
                    trial_times)
Пример #4
0
 def k_corated(webname):
     global sorted_ratings
     if cluster_flag:
         clusters = load_clusters(load('nratings_' + data_set), k)
         sorted_ratings = sort_through_clusters(original_ratings, clusters)
         k_corated, index_translator = corating_all_through_clusters(
             sorted_ratings.copy(), clusters)
     else:
         sorted_ratings = sort(original_ratings)
         k_corated, index_translator = k_corating_all(
             sorted_ratings.copy(), k)
     k_file_name = get_k_corated_name_by_attr(data_set, k, webname)
     index_file_name = get_k_corated_index_by_attr(data_set, k, webname)
     dump(k_file_name, k_corated)
     dump(index_file_name, index_translator)
Пример #5
0
            if i != 0:
                index = int(i) - 1
                count[index] += 1
        return count

    all_count = [0] * rating_scale
    items_count = []
    item_size = original_ratings.shape[1]
    for i in range(item_size):
        item = original_ratings[:, i]
        temp_count = get_count(item)
        items_count.append(temp_count)
        all_count = list(np.array(all_count) + np.array(temp_count))
    # for i in range(rating_scale):
    #     plt.figure()
    #     label = 'portion of %s' % (i + 1)
    #     x = [i for i in range(item_size)]
    #     y = [temp_count[i] / sum(temp_count) for temp_count in items_count]
    #     plt.plot(x, y, label=label)
    #     plt.legend()
    #     plt.savefig('items_portion_%s.jpg' % (i + 1))
    logging.info([i / sum(all_count) for i in all_count])


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-r', '--ratings', required=True)
    args = parser.parse_args()
    original_ratings = load(args.ratings)
    ratia_analysis(original_ratings)
Пример #6
0
def init(dataset, web_name):
    global original_ratings, web
    original_ratings = load(get_ratings_name_from_dataset(dataset))
    web = load(web_name)