Пример #1
0
def user_features():
    """用户特征"""
    users_feature = {}
    for line in loadfile(base_path + "ml-1m/users.dat", encoding="ISO-8859-1"):
        arr = line.split("::")
        sex_feature = judge_sex(arr[1])
        age_feature = judge_age(int(arr[2]))
        user_feature_list = sex_feature + age_feature + [int(arr[3])]
        users_feature.setdefault(arr[0], user_feature_list)
    return users_feature
Пример #2
0
def user_item_rating():
    """user-item"""
    user_item = {}
    for line in loadfile(base_path + "ml-1m/ratings.dat"):
        arr = line.split("::")
        uid = arr[0]
        user_item.setdefault(uid, [[], []])
        user_item[uid][0].append(arr[1])
        user_item[uid][1].append(arr[2])

    return user_item
Пример #3
0
def create_user_item_dict():
    """生成user-item 字典"""
    item_dict = create_item_dict()
    user_item_dict = {}
    for line in loadfile(base_path + "ml-1m/ratings.dat",
                         encoding="ISO-8859-1"):
        arr = line.split("::")
        user_item_dict.setdefault(arr[0], list())
        movie = item_dict.get(arr[1])
        user_item_dict.get(arr[0]).append(movie)

    return user_item_dict
def load_data(train_rate=1):
    train_items_ratings = {}
    test_data = {}
    for line in loadfile(base_path + "ml-1m/ratings.dat"):
        arr = line.split("::")
        if random.random() < train_rate:
            train_items_ratings.setdefault(arr[1], set())
            train_items_ratings[arr[1]].add(arr[2])
        else:
            test_data.setdefault(arr[0], set())
            test_data[arr[0]].add(arr[1])

    return train_items_ratings, test_data
Пример #5
0
def create_item_dict():
    """生成item 字典"""
    item_dict = {}
    for line in loadfile(base_path + "ml-1m/movies.dat",
                         encoding="ISO-8859-1"):
        arr = line.split("::")
        item_dict.setdefault(arr[0], {})
        title = remove_punctuation(arr[1])
        genres = ' '.join(g.strip('\n') for g in arr[2].split("|"))
        item_dict[arr[0]]['title'] = title
        item_dict[arr[0]]['genres'] = genres

    return item_dict
Пример #6
0
def item_features():
    """商品特征矩阵"""
    item_feature = {}
    for line in loadfile(base_path + "ml-1m/movies.dat",
                         encoding="ISO-8859-1"):
        arr = line.split("::")
        year = int(re.findall("\d{4}", arr[1])[0])
        year_label = judge_year(year)
        genres = arr[2].split("|")
        genres_list = judge_genres(genres)
        item_feature_list = year_label + genres_list
        item_feature.setdefault(arr[0], item_feature_list)

    return item_feature
Пример #7
0
        item_feature_list = year_label + genres_list
        item_feature.setdefault(arr[0], item_feature_list)

    return item_feature


if __name__ == '__main__':
    count_2000 = 0
    count_1990 = 0
    count_1980 = 0
    count_1970 = 0
    other = 0

    item_feature = {}

    for line in loadfile(base_path + "ml-1m/movies.dat",
                         encoding="ISO-8859-1"):
        arr = line.split("::")
        year = int(re.findall("\d{4}", arr[1])[0])
        year_label = judge_year(year)
        genres = arr[2].split("|")
        genres_list = judge_genres(genres)
        item_feature_list = year_label + genres_list
        item_feature.setdefault(arr[0], item_feature_list)
        if year >= 2000:
            count_2000 += 1
        elif year >= 1990 and year < 2000:
            count_1990 += 1
        elif year >= 1980 and year < 1990:
            count_1980 += 1
        elif year >= 1970 and year < 1980:
            count_1970 += 1